Skip to content

Commit

Permalink
mvcc: Do not move unevictable snapshots to cache
Browse files Browse the repository at this point in the history
Commit 6ccd317 introduced a bug in partition_entry::evict() where a
partition entry may be partially evicted if there are non-evictable
snapshots in it. Partially evicting some of the versions may violate
consistency of a snapshot which includes evicted versions. For one,
continuity flags are interpreted realtive to the merged view, not
within a version, so evicting from some of the versions may mark
reanges as continuous when before they were discontinuous. Also, range
tombtsones of the snapshot are taken from all versions, so we can't
partially evict some of them without marking all affected ranges as
discontinuous.

The fix is to revert back to full eviciton, and avoid moving
non-evictable snapshots to cache. When moving whole partition entry to
cache, we first create a neutral empty partition entry and then merge
the memtable entry into it just like we would if the entry already
existed.

Fixes #3215.

Tests: unit (release)
Message-Id: <1518710592-21925-2-git-send-email-tgrabiec@scylladb.com>
  • Loading branch information
tgrabiec authored and pdziepak committed Feb 15, 2018
1 parent 1e218e2 commit b0b57b8
Show file tree
Hide file tree
Showing 5 changed files with 6 additions and 81 deletions.
25 changes: 1 addition & 24 deletions partition_version.cc
Expand Up @@ -207,19 +207,6 @@ partition_entry::partition_entry(partition_entry::evictable_tag, const schema& s
_version.make_evictable();
}

partition_entry::partition_entry(partition_entry::evictable_tag, const schema& s, partition_entry&& e)
: partition_entry([&] {
if (e._snapshot) {
// We must not change evictability of existing snapshots
// FIXME: https://github.com/scylladb/scylla/issues/1938
e.add_version(s);
}
return std::move(e);
}())
{
_version.make_evictable();
}

partition_entry partition_entry::make_evictable(const schema& s, mutation_partition&& mp) {
return {evictable_tag(), s, std::move(mp)};
}
Expand All @@ -228,14 +215,6 @@ partition_entry partition_entry::make_evictable(const schema& s, const mutation_
return make_evictable(s, mutation_partition(mp));
}

partition_entry partition_entry::make_evictable(const schema& s, partition_entry&& pe) {
// If we can assume that _pe is fully continuous, we don't need to check all versions
// to determine what the continuity is.
// This doesn't change value and doesn't invalidate iterators, so can be called even with a snapshot.
pe.version()->partition().ensure_last_dummy(s);
return partition_entry(evictable_tag(), s, std::move(pe));
}

partition_entry::~partition_entry() {
if (!_version) {
return;
Expand Down Expand Up @@ -574,10 +553,8 @@ void partition_entry::evict() noexcept {
if (!_version) {
return;
}
// Must evict from all versions atomically to keep snapshots consistent.
for (auto&& v : versions()) {
if (v.is_referenced() && !v.back_reference().evictable()) {
break;
}
v.partition().evict();
}
current_allocator().invalidate_references();
Expand Down
4 changes: 0 additions & 4 deletions partition_version.hh
Expand Up @@ -328,14 +328,10 @@ public:
// Constructs an evictable entry
// Strong exception guarantees for the state of mp.
partition_entry(evictable_tag, const schema& s, mutation_partition&& mp);
// Strong exception guarantees for the state of pe.
partition_entry(evictable_tag, const schema& s, partition_entry&& pe);
~partition_entry();

static partition_entry make_evictable(const schema& s, mutation_partition&& mp);
static partition_entry make_evictable(const schema& s, const mutation_partition& mp);
// pe must be a non-evictable fully continuous entry.
static partition_entry make_evictable(const schema& s, partition_entry&& pe);

partition_entry(partition_entry&& pe) noexcept
: _snapshot(pe._snapshot), _version(std::move(pe._version))
Expand Down
7 changes: 5 additions & 2 deletions row_cache.cc
Expand Up @@ -1010,11 +1010,14 @@ future<> row_cache::update(external_updater eu, memtable& m) {
_tracker.touch(entry);
_tracker.on_merge();
} else if (cache_i->continuous() || is_present(mem_e.key()) == partition_presence_checker_result::definitely_doesnt_exist) {
cache_entry* entry = current_allocator().construct<cache_entry>(
mem_e.schema(), std::move(mem_e.key()), std::move(mem_e.partition()));
// Partition is absent in underlying. First, insert a neutral partition entry.
cache_entry* entry = current_allocator().construct<cache_entry>(cache_entry::evictable_tag(),
_schema, dht::decorated_key(mem_e.key()),
partition_entry::make_evictable(*_schema, mutation_partition(_schema)));
entry->set_continuous(cache_i->continuous());
_tracker.insert(*entry);
_partitions.insert(cache_i, *entry);
entry->partition().apply_to_incomplete(*_schema, std::move(mem_e.partition()), *mem_e.schema(), _tracker.region());
}
});
}
Expand Down
6 changes: 0 additions & 6 deletions row_cache.hh
Expand Up @@ -110,12 +110,6 @@ public:
partition_entry::make_evictable(*s, std::move(p)))
{ }

// It is assumed that pe is fully continuous
cache_entry(schema_ptr s, dht::decorated_key&& key, partition_entry&& pe)
: cache_entry(evictable_tag(), s, std::move(key),
partition_entry::make_evictable(*s, std::move(pe)))
{ }

// It is assumed that pe is fully continuous
// pe must be evictable.
cache_entry(evictable_tag, schema_ptr s, dht::decorated_key&& key, partition_entry&& pe) noexcept
Expand Down
45 changes: 0 additions & 45 deletions tests/mvcc_test.cc
Expand Up @@ -662,48 +662,3 @@ SEASTAR_TEST_CASE(test_apply_is_atomic) {
do_test(random_mutation_generator(random_mutation_generator::generate_counters::yes));
return make_ready_future<>();
}


SEASTAR_TEST_CASE(test_eviction_with_mixed_snapshot_evictability) {
return seastar::async([] {
random_mutation_generator gen(random_mutation_generator::generate_counters::no);
auto s = gen.schema();

mutation m1 = gen();
mutation m2 = gen();
m1.partition().make_fully_continuous();
m2.partition().make_fully_continuous();

logalloc::region r;
with_allocator(r.allocator(), [&] {
logalloc::reclaim_lock l(r);

auto e = partition_entry(mutation_partition(s));

e.apply(*s, m1.partition(), *s);

auto snap1 = e.read(r, s); // non-evictable

e = partition_entry::make_evictable(*s, std::move(e));

{
partition_entry tmp(m2.partition());
e.apply_to_incomplete(*s, std::move(tmp), *m2.schema(), r);
}

auto snap2 = e.read(r, s); // evictable

e.evict();

assert_that(s, read_using_cursor(*snap1)).is_equal_to(m1.partition());

snap1 = {};
e.evict();

// Everything should be evicted now, since non-evictable snapshot is gone
auto pt = m1.partition().partition_tombstone() + m2.partition().partition_tombstone();
assert_that(s, read_using_cursor(*snap2))
.is_equal_to(mutation_partition::make_incomplete(*s, pt));
});
});
}

0 comments on commit b0b57b8

Please sign in to comment.