Skip to content

Commit

Permalink
Adding a user comparator for comparing Uint64 slices.
Browse files Browse the repository at this point in the history
Summary:
- New Uint64 comparator
- Modify Reader and Builder to take custom user comparators instead of bytewise comparator
- Modify logic for choosing unused user key in builder
- Modify iterator logic in reader
- test changes

Test Plan:
cuckoo_table_{builder,reader,db}_test
make check all

Reviewers: ljin, sdong

Reviewed By: ljin

Subscribers: dhruba, leveldb

Differential Revision: https://reviews.facebook.net/D22377
  • Loading branch information
adyarshyam committed Aug 27, 2014
1 parent 1913ce2 commit 4142a3e
Show file tree
Hide file tree
Showing 12 changed files with 243 additions and 105 deletions.
34 changes: 32 additions & 2 deletions db/cuckoo_table_db_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -131,8 +131,6 @@ TEST(CuckooTableDBTest, Flush) {
ASSERT_EQ("v2", Get("key2"));
ASSERT_EQ("v3", Get("key3"));
ASSERT_EQ("NOT_FOUND", Get("key4"));
ASSERT_EQ("Invalid argument: Length of key is invalid.", Get("somelongkey"));
ASSERT_EQ("Invalid argument: Length of key is invalid.", Get("s"));

// Now add more keys and flush.
ASSERT_OK(Put("key4", "v4"));
Expand Down Expand Up @@ -195,6 +193,38 @@ static std::string Key(int i) {
snprintf(buf, sizeof(buf), "key_______%06d", i);
return std::string(buf);
}
static std::string Uint64Key(uint64_t i) {
std::string str;
str.resize(8);
memcpy(&str[0], static_cast<void*>(&i), 8);
return str;
}
} // namespace.

TEST(CuckooTableDBTest, Uint64Comparator) {
Options options = CurrentOptions();
options.comparator = test::Uint64Comparator();
Reopen(&options);

ASSERT_OK(Put(Uint64Key(1), "v1"));
ASSERT_OK(Put(Uint64Key(2), "v2"));
ASSERT_OK(Put(Uint64Key(3), "v3"));
dbfull()->TEST_FlushMemTable();

ASSERT_EQ("v1", Get(Uint64Key(1)));
ASSERT_EQ("v2", Get(Uint64Key(2)));
ASSERT_EQ("v3", Get(Uint64Key(3)));
ASSERT_EQ("NOT_FOUND", Get(Uint64Key(4)));

// Add more keys.
ASSERT_OK(Delete(Uint64Key(2))); // Delete.
ASSERT_OK(Put(Uint64Key(3), "v0")); // Update.
ASSERT_OK(Put(Uint64Key(4), "v4"));
dbfull()->TEST_FlushMemTable();
ASSERT_EQ("v1", Get(Uint64Key(1)));
ASSERT_EQ("NOT_FOUND", Get(Uint64Key(2)));
ASSERT_EQ("v0", Get(Uint64Key(3)));
ASSERT_EQ("v4", Get(Uint64Key(4)));
}

TEST(CuckooTableDBTest, CompactionTrigger) {
Expand Down
8 changes: 8 additions & 0 deletions db/db_bench.cc
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ DEFINE_int32(duration, 0, "Time in seconds for the random-ops tests to run."

DEFINE_int32(value_size, 100, "Size of each value");

DEFINE_bool(use_uint64_comparator, false, "use Uint64 user comparator");

static bool ValidateKeySize(const char* flagname, int32_t value) {
return true;
Expand Down Expand Up @@ -1650,6 +1651,13 @@ class Benchmark {
options.prefix_extractor.reset(
NewFixedPrefixTransform(FLAGS_prefix_size));
}
if (FLAGS_use_uint64_comparator) {
options.comparator = test::Uint64Comparator();
if (FLAGS_key_size != 8) {
fprintf(stderr, "Using Uint64 comparator but key size is not 8.\n");
exit(1);
}
}
options.memtable_prefix_bloom_bits = FLAGS_memtable_bloom_bits;
options.bloom_locality = FLAGS_bloom_locality;
options.max_open_files = FLAGS_open_files;
Expand Down
66 changes: 35 additions & 31 deletions table/cuckoo_table_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ extern const uint64_t kCuckooTableMagicNumber = 0x926789d0c5f17873ull;
CuckooTableBuilder::CuckooTableBuilder(
WritableFile* file, double hash_table_ratio,
uint32_t max_num_hash_table, uint32_t max_search_depth,
const Comparator* user_comparator,
uint64_t (*get_slice_hash)(const Slice&, uint32_t, uint64_t))
: num_hash_table_(2),
file_(file),
Expand All @@ -47,6 +48,7 @@ CuckooTableBuilder::CuckooTableBuilder(
max_search_depth_(max_search_depth),
is_last_level_file_(false),
has_seen_first_key_(false),
ucomp_(user_comparator),
get_slice_hash_(get_slice_hash),
closed_(false) {
properties_.num_entries = 0;
Expand All @@ -73,6 +75,8 @@ void CuckooTableBuilder::Add(const Slice& key, const Slice& value) {
if (!has_seen_first_key_) {
is_last_level_file_ = ikey.sequence == 0;
has_seen_first_key_ = true;
smallest_user_key_.assign(ikey.user_key.data(), ikey.user_key.size());
largest_user_key_.assign(ikey.user_key.data(), ikey.user_key.size());
}
// Even if one sequence number is non-zero, then it is not last level.
assert(!is_last_level_file_ || ikey.sequence == 0);
Expand All @@ -82,23 +86,17 @@ void CuckooTableBuilder::Add(const Slice& key, const Slice& value) {
} else {
kvs_.emplace_back(std::make_pair(key.ToString(), value.ToString()));
}

properties_.num_entries++;

// We assume that the keys are inserted in sorted order as determined by
// Byte-wise comparator. To identify an unused key, which will be used in
// filling empty buckets in the table, we try to find gaps between successive
// keys inserted (ie, latest key and previous in kvs_).
if (unused_user_key_.empty() && kvs_.size() > 1) {
std::string prev_key = is_last_level_file_ ? kvs_[kvs_.size()-1].first
: ExtractUserKey(kvs_[kvs_.size()-1].first).ToString();
std::string new_user_key = prev_key;
new_user_key.back()++;
// We ignore carry-overs and check that it is larger than previous key.
if (Slice(new_user_key).compare(Slice(prev_key)) > 0 &&
Slice(new_user_key).compare(ikey.user_key) < 0) {
unused_user_key_ = new_user_key;
}
// In order to fill the empty buckets in the hash table, we identify a
// key which is not used so far (unused_user_key). We determine this by
// maintaining smallest and largest keys inserted so far in bytewise order
// and use them to find a key outside this range in Finish() operation.
// Note that this strategy is independent of user comparator used here.
if (ikey.user_key.compare(smallest_user_key_) < 0) {
smallest_user_key_.assign(ikey.user_key.data(), ikey.user_key.size());
} else if (ikey.user_key.compare(largest_user_key_) > 0) {
largest_user_key_.assign(ikey.user_key.data(), ikey.user_key.size());
}
}

Expand All @@ -119,7 +117,7 @@ Status CuckooTableBuilder::MakeHashTable(std::vector<CuckooBucket>* buckets) {
bucket_found = true;
break;
} else {
if (user_key.compare(is_last_level_file_
if (ucomp_->Compare(user_key, is_last_level_file_
? Slice(kvs_[(*buckets)[hash_val].vector_idx].first)
: ExtractUserKey(
kvs_[(*buckets)[hash_val].vector_idx].first)) == 0) {
Expand Down Expand Up @@ -160,31 +158,37 @@ Status CuckooTableBuilder::Finish() {
if (!s.ok()) {
return s;
}
if (unused_user_key_.empty() && !kvs_.empty()) {
// Try to find the key next to last key by handling carryovers.
std::string last_key =
is_last_level_file_ ? kvs_[kvs_.size()-1].first
: ExtractUserKey(kvs_[kvs_.size()-1].first).ToString();
std::string new_user_key = last_key;
int curr_pos = new_user_key.size() - 1;
// Determine unused_user_key to fill empty buckets.
std::string unused_bucket;
if (!kvs_.empty()) {
std::string unused_user_key = smallest_user_key_;
int curr_pos = unused_user_key.size() - 1;
while (curr_pos >= 0) {
++new_user_key[curr_pos];
if (new_user_key > last_key) {
unused_user_key_ = new_user_key;
--unused_user_key[curr_pos];
if (Slice(unused_user_key).compare(smallest_user_key_) < 0) {
break;
}
--curr_pos;
}
if (curr_pos < 0) {
// Try using the largest key to identify an unused key.
unused_user_key = largest_user_key_;
curr_pos = unused_user_key.size() - 1;
while (curr_pos >= 0) {
++unused_user_key[curr_pos];
if (Slice(unused_user_key).compare(largest_user_key_) > 0) {
break;
}
--curr_pos;
}
}
if (curr_pos < 0) {
return Status::Corruption("Unable to find unused key");
}
}
std::string unused_bucket;
if (!kvs_.empty()) {
if (is_last_level_file_) {
unused_bucket = unused_user_key_;
unused_bucket = unused_user_key;
} else {
ParsedInternalKey ikey(unused_user_key_, 0, kTypeValue);
ParsedInternalKey ikey(unused_user_key, 0, kTypeValue);
AppendInternalKey(&unused_bucket, ikey);
}
}
Expand Down
6 changes: 4 additions & 2 deletions table/cuckoo_table_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ class CuckooTableBuilder: public TableBuilder {
public:
CuckooTableBuilder(
WritableFile* file, double hash_table_ratio, uint32_t max_num_hash_table,
uint32_t max_search_depth,
uint32_t max_search_depth, const Comparator* user_comparator,
uint64_t (*get_slice_hash)(const Slice&, uint32_t, uint64_t));

// REQUIRES: Either Finish() or Abandon() has been called.
Expand Down Expand Up @@ -83,9 +83,11 @@ class CuckooTableBuilder: public TableBuilder {
std::vector<std::pair<std::string, std::string>> kvs_;
TableProperties properties_;
bool has_seen_first_key_;
const Comparator* ucomp_;
uint64_t (*get_slice_hash_)(const Slice& s, uint32_t index,
uint64_t max_num_buckets);
std::string unused_user_key_ = "";
std::string largest_user_key_ = "";
std::string smallest_user_key_ = "";

bool closed_; // Either Finish() or Abandon() has been called.

Expand Down
30 changes: 15 additions & 15 deletions table/cuckoo_table_builder_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ TEST(CuckooBuilderTest, SuccessWithEmptyFile) {
fname = test::TmpDir() + "/NoCollisionFullKey";
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
4, 100, GetSliceHash);
4, 100, BytewiseComparator(), GetSliceHash);
ASSERT_OK(builder.status());
ASSERT_OK(builder.Finish());
ASSERT_OK(writable_file->Close());
Expand All @@ -146,7 +146,7 @@ TEST(CuckooBuilderTest, WriteSuccessNoCollisionFullKey) {
fname = test::TmpDir() + "/NoCollisionFullKey";
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
num_hash_fun, 100, GetSliceHash);
num_hash_fun, 100, BytewiseComparator(), GetSliceHash);
ASSERT_OK(builder.status());
for (uint32_t i = 0; i < user_keys.size(); i++) {
builder.Add(Slice(keys[i]), Slice(values[i]));
Expand All @@ -157,7 +157,7 @@ TEST(CuckooBuilderTest, WriteSuccessNoCollisionFullKey) {
ASSERT_OK(writable_file->Close());

uint32_t expected_max_buckets = keys.size() / kHashTableRatio;
std::string expected_unused_bucket = GetInternalKey("key05", true);
std::string expected_unused_bucket = GetInternalKey("key00", true);
expected_unused_bucket += std::string(values[0].size(), 'a');
CheckFileContents(keys, values, expected_locations,
expected_unused_bucket, expected_max_buckets, 2, false);
Expand All @@ -183,7 +183,7 @@ TEST(CuckooBuilderTest, WriteSuccessWithCollisionFullKey) {
fname = test::TmpDir() + "/WithCollisionFullKey";
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
num_hash_fun, 100, GetSliceHash);
num_hash_fun, 100, BytewiseComparator(), GetSliceHash);
ASSERT_OK(builder.status());
for (uint32_t i = 0; i < user_keys.size(); i++) {
builder.Add(Slice(keys[i]), Slice(values[i]));
Expand All @@ -194,7 +194,7 @@ TEST(CuckooBuilderTest, WriteSuccessWithCollisionFullKey) {
ASSERT_OK(writable_file->Close());

uint32_t expected_max_buckets = keys.size() / kHashTableRatio;
std::string expected_unused_bucket = GetInternalKey("key05", true);
std::string expected_unused_bucket = GetInternalKey("key00", true);
expected_unused_bucket += std::string(values[0].size(), 'a');
CheckFileContents(keys, values, expected_locations,
expected_unused_bucket, expected_max_buckets, 4, false);
Expand Down Expand Up @@ -225,7 +225,7 @@ TEST(CuckooBuilderTest, WithCollisionPathFullKey) {
fname = test::TmpDir() + "/WithCollisionPathFullKey";
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
num_hash_fun, 100, GetSliceHash);
num_hash_fun, 100, BytewiseComparator(), GetSliceHash);
ASSERT_OK(builder.status());
for (uint32_t i = 0; i < user_keys.size(); i++) {
builder.Add(Slice(keys[i]), Slice(values[i]));
Expand All @@ -236,7 +236,7 @@ TEST(CuckooBuilderTest, WithCollisionPathFullKey) {
ASSERT_OK(writable_file->Close());

uint32_t expected_max_buckets = keys.size() / kHashTableRatio;
std::string expected_unused_bucket = GetInternalKey("key06", true);
std::string expected_unused_bucket = GetInternalKey("key00", true);
expected_unused_bucket += std::string(values[0].size(), 'a');
CheckFileContents(keys, values, expected_locations,
expected_unused_bucket, expected_max_buckets, 2, false);
Expand All @@ -258,7 +258,7 @@ TEST(CuckooBuilderTest, WriteSuccessNoCollisionUserKey) {
fname = test::TmpDir() + "/NoCollisionUserKey";
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
num_hash_fun, 100, GetSliceHash);
num_hash_fun, 100, BytewiseComparator(), GetSliceHash);
ASSERT_OK(builder.status());
for (uint32_t i = 0; i < user_keys.size(); i++) {
builder.Add(Slice(GetInternalKey(user_keys[i], true)), Slice(values[i]));
Expand All @@ -269,7 +269,7 @@ TEST(CuckooBuilderTest, WriteSuccessNoCollisionUserKey) {
ASSERT_OK(writable_file->Close());

uint32_t expected_max_buckets = user_keys.size() / kHashTableRatio;
std::string expected_unused_bucket = "key05";
std::string expected_unused_bucket = "key00";
expected_unused_bucket += std::string(values[0].size(), 'a');
CheckFileContents(user_keys, values, expected_locations,
expected_unused_bucket, expected_max_buckets, 2, true);
Expand All @@ -291,7 +291,7 @@ TEST(CuckooBuilderTest, WriteSuccessWithCollisionUserKey) {
fname = test::TmpDir() + "/WithCollisionUserKey";
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
num_hash_fun, 100, GetSliceHash);
num_hash_fun, 100, BytewiseComparator(), GetSliceHash);
ASSERT_OK(builder.status());
for (uint32_t i = 0; i < user_keys.size(); i++) {
builder.Add(Slice(GetInternalKey(user_keys[i], true)), Slice(values[i]));
Expand All @@ -302,7 +302,7 @@ TEST(CuckooBuilderTest, WriteSuccessWithCollisionUserKey) {
ASSERT_OK(writable_file->Close());

uint32_t expected_max_buckets = user_keys.size() / kHashTableRatio;
std::string expected_unused_bucket = "key05";
std::string expected_unused_bucket = "key00";
expected_unused_bucket += std::string(values[0].size(), 'a');
CheckFileContents(user_keys, values, expected_locations,
expected_unused_bucket, expected_max_buckets, 4, true);
Expand All @@ -326,7 +326,7 @@ TEST(CuckooBuilderTest, WithCollisionPathUserKey) {
fname = test::TmpDir() + "/WithCollisionPathUserKey";
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
num_hash_fun, 2, GetSliceHash);
num_hash_fun, 2, BytewiseComparator(), GetSliceHash);
ASSERT_OK(builder.status());
for (uint32_t i = 0; i < user_keys.size(); i++) {
builder.Add(Slice(GetInternalKey(user_keys[i], true)), Slice(values[i]));
Expand All @@ -337,7 +337,7 @@ TEST(CuckooBuilderTest, WithCollisionPathUserKey) {
ASSERT_OK(writable_file->Close());

uint32_t expected_max_buckets = user_keys.size() / kHashTableRatio;
std::string expected_unused_bucket = "key06";
std::string expected_unused_bucket = "key00";
expected_unused_bucket += std::string(values[0].size(), 'a');
CheckFileContents(user_keys, values, expected_locations,
expected_unused_bucket, expected_max_buckets, 2, true);
Expand All @@ -362,7 +362,7 @@ TEST(CuckooBuilderTest, FailWhenCollisionPathTooLong) {
fname = test::TmpDir() + "/WithCollisionPathUserKey";
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
num_hash_fun, 2, GetSliceHash);
num_hash_fun, 2, BytewiseComparator(), GetSliceHash);
ASSERT_OK(builder.status());
for (uint32_t i = 0; i < user_keys.size(); i++) {
builder.Add(Slice(GetInternalKey(user_keys[i], false)), Slice("value"));
Expand All @@ -382,7 +382,7 @@ TEST(CuckooBuilderTest, FailWhenSameKeyInserted) {
fname = test::TmpDir() + "/FailWhenSameKeyInserted";
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
num_hash_fun, 100, GetSliceHash);
num_hash_fun, 100, BytewiseComparator(), GetSliceHash);
ASSERT_OK(builder.status());

builder.Add(Slice(GetInternalKey(user_key, false)), Slice("value1"));
Expand Down
5 changes: 3 additions & 2 deletions table/cuckoo_table_factory.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ Status CuckooTableFactory::NewTableReader(const Options& options,
std::unique_ptr<RandomAccessFile>&& file, uint64_t file_size,
std::unique_ptr<TableReader>* table) const {
std::unique_ptr<CuckooTableReader> new_reader(new CuckooTableReader(options,
std::move(file), file_size, GetSliceMurmurHash));
std::move(file), file_size, icomp.user_comparator(), GetSliceMurmurHash));
Status s = new_reader->status();
if (s.ok()) {
*table = std::move(new_reader);
Expand All @@ -48,7 +48,8 @@ TableBuilder* CuckooTableFactory::NewTableBuilder(
const Options& options, const InternalKeyComparator& internal_comparator,
WritableFile* file, CompressionType compression_type) const {
return new CuckooTableBuilder(file, hash_table_ratio_, kMaxNumHashTable,
max_search_depth_, GetSliceMurmurHash);
max_search_depth_, internal_comparator.user_comparator(),
GetSliceMurmurHash);
}

std::string CuckooTableFactory::GetPrintableTableOptions() const {
Expand Down
1 change: 0 additions & 1 deletion table/cuckoo_table_factory.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ extern uint64_t GetSliceMurmurHash(const Slice& s, uint32_t index,
// - Key length and Value length are fixed.
// - Does not support Snapshot.
// - Does not support Merge operations.
// - Only supports Bytewise comparators.
class CuckooTableFactory : public TableFactory {
public:
CuckooTableFactory(double hash_table_ratio, uint32_t max_search_depth)
Expand Down

0 comments on commit 4142a3e

Please sign in to comment.