Skip to content

Commit

Permalink
sstables: Fix update of tombstone GC settings to have immediate effect
Browse files Browse the repository at this point in the history
After "repair: Get rid of the gc_grace_seconds", the sstable's schema (mode,
gc period if applicable, etc) is used to estimate the amount of droppable
data (or determine full expiration = max_deletion_time < gc_before).
It could happen that the user switched from timeout to repair mode, but
sstables will still use the old mode, despite the user asked for a new one.
Another example is when you play with value of grace period, to prevent
data resurrection if repair won't be able to run in a timely manner.
The problem persists until all sstables using old GC mode are recompacted
or node is restarted.
To fix this, we have to feed latest schema into sstable procedures used
for expiration purposes.

Fixes scylladb#15643.

Signed-off-by: Raphael S. Carvalho <raphaelsc@scylladb.com>
  • Loading branch information
raphaelsc committed Oct 17, 2023
1 parent 031ff75 commit ce60009
Show file tree
Hide file tree
Showing 8 changed files with 14 additions and 16 deletions.
4 changes: 2 additions & 2 deletions compaction/compaction.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1800,7 +1800,7 @@ get_fully_expired_sstables(const table_state& table_s, const std::vector<sstable
int64_t min_timestamp = std::numeric_limits<int64_t>::max();

for (auto& sstable : overlapping) {
auto gc_before = sstable->get_gc_before_for_fully_expire(compaction_time, table_s.get_tombstone_gc_state());
auto gc_before = sstable->get_gc_before_for_fully_expire(compaction_time, table_s.get_tombstone_gc_state(), table_s.schema());
if (sstable->get_max_local_deletion_time() >= gc_before) {
min_timestamp = std::min(min_timestamp, sstable->get_stats_metadata().min_timestamp);
}
Expand All @@ -1819,7 +1819,7 @@ get_fully_expired_sstables(const table_state& table_s, const std::vector<sstable

// SStables that do not contain live data is added to list of possibly expired sstables.
for (auto& candidate : compacting) {
auto gc_before = candidate->get_gc_before_for_fully_expire(compaction_time, table_s.get_tombstone_gc_state());
auto gc_before = candidate->get_gc_before_for_fully_expire(compaction_time, table_s.get_tombstone_gc_state(), table_s.schema());
clogger.debug("Checking if candidate of generation {} and max_deletion_time {} is expired, gc_before is {}",
candidate->generation(), candidate->get_stats_metadata().max_local_deletion_time, gc_before);
// A fully expired sstable which has an ancestor undeleted shouldn't be compacted because
Expand Down
4 changes: 2 additions & 2 deletions compaction/compaction_strategy.cc
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ std::vector<compaction_descriptor> compaction_strategy_impl::get_cleanup_compact
}));
}

bool compaction_strategy_impl::worth_dropping_tombstones(const shared_sstable& sst, gc_clock::time_point compaction_time, const tombstone_gc_state& gc_state) {
bool compaction_strategy_impl::worth_dropping_tombstones(const shared_sstable& sst, gc_clock::time_point compaction_time, const table_state& t) {
if (_disable_tombstone_compaction) {
return false;
}
Expand All @@ -62,7 +62,7 @@ bool compaction_strategy_impl::worth_dropping_tombstones(const shared_sstable& s
if (db_clock::now()-_tombstone_compaction_interval < sst->data_file_write_time()) {
return false;
}
auto gc_before = sst->get_gc_before_for_drop_estimation(compaction_time, gc_state);
auto gc_before = sst->get_gc_before_for_drop_estimation(compaction_time, t.get_tombstone_gc_state(), t.schema());
return sst->estimate_droppable_tombstone_ratio(gc_before) >= _tombstone_threshold;
}

Expand Down
2 changes: 1 addition & 1 deletion compaction/compaction_strategy_impl.hh
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ public:

// Check if a given sstable is entitled for tombstone compaction based on its
// droppable tombstone histogram and gc_before.
bool worth_dropping_tombstones(const shared_sstable& sst, gc_clock::time_point compaction_time, const tombstone_gc_state& gc_state);
bool worth_dropping_tombstones(const shared_sstable& sst, gc_clock::time_point compaction_time, const table_state& t);

virtual std::unique_ptr<compaction_backlog_tracker::impl> make_backlog_tracker() const = 0;

Expand Down
6 changes: 3 additions & 3 deletions compaction/leveled_compaction_strategy.cc
Original file line number Diff line number Diff line change
Expand Up @@ -51,15 +51,15 @@ compaction_descriptor leveled_compaction_strategy::get_sstables_for_compaction(t
auto& sstables = manifest.get_level(level);
// filter out sstables which droppable tombstone ratio isn't greater than the defined threshold.
auto e = boost::range::remove_if(sstables, [this, compaction_time, &table_s] (const sstables::shared_sstable& sst) -> bool {
return !worth_dropping_tombstones(sst, compaction_time, table_s.get_tombstone_gc_state());
return !worth_dropping_tombstones(sst, compaction_time, table_s);
});
sstables.erase(e, sstables.end());
if (sstables.empty()) {
continue;
}
auto& sst = *std::max_element(sstables.begin(), sstables.end(), [&] (auto& i, auto& j) {
auto gc_before1 = i->get_gc_before_for_drop_estimation(compaction_time, table_s.get_tombstone_gc_state());
auto gc_before2 = j->get_gc_before_for_drop_estimation(compaction_time, table_s.get_tombstone_gc_state());
auto gc_before1 = i->get_gc_before_for_drop_estimation(compaction_time, table_s.get_tombstone_gc_state(), table_s.schema());
auto gc_before2 = j->get_gc_before_for_drop_estimation(compaction_time, table_s.get_tombstone_gc_state(), table_s.schema());
return i->estimate_droppable_tombstone_ratio(gc_before1) < j->estimate_droppable_tombstone_ratio(gc_before2);
});
return sstables::compaction_descriptor({ sst }, sst->get_sstable_level());
Expand Down
2 changes: 1 addition & 1 deletion compaction/size_tiered_compaction_strategy.cc
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ size_tiered_compaction_strategy::get_sstables_for_compaction(table_state& table_
for (auto&& sstables : buckets | boost::adaptors::reversed) {
// filter out sstables which droppable tombstone ratio isn't greater than the defined threshold.
auto e = boost::range::remove_if(sstables, [this, compaction_time, &table_s] (const sstables::shared_sstable& sst) -> bool {
return !worth_dropping_tombstones(sst, compaction_time, table_s.get_tombstone_gc_state());
return !worth_dropping_tombstones(sst, compaction_time, table_s);
});
sstables.erase(e, sstables.end());
if (sstables.empty()) {
Expand Down
2 changes: 1 addition & 1 deletion compaction/time_window_compaction_strategy.cc
Original file line number Diff line number Diff line change
Expand Up @@ -366,7 +366,7 @@ time_window_compaction_strategy::get_next_non_expired_sstables(table_state& tabl
// if there is no sstable to compact in standard way, try compacting single sstable whose droppable tombstone
// ratio is greater than threshold.
auto e = boost::range::remove_if(non_expiring_sstables, [this, compaction_time, &table_s] (const shared_sstable& sst) -> bool {
return !worth_dropping_tombstones(sst, compaction_time, table_s.get_tombstone_gc_state());
return !worth_dropping_tombstones(sst, compaction_time, table_s);
});
non_expiring_sstables.erase(e, non_expiring_sstables.end());
if (non_expiring_sstables.empty()) {
Expand Down
6 changes: 2 additions & 4 deletions sstables/sstables.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3033,8 +3033,7 @@ std::optional<large_data_stats_entry> sstable::get_large_data_stat(large_data_ty
// gc_before for all the partitions that have record in repair history map. It
// is fine that some of the partitions inside the sstable does not have a
// record.
gc_clock::time_point sstable::get_gc_before_for_drop_estimation(const gc_clock::time_point& compaction_time, const tombstone_gc_state& gc_state) const {
auto s = get_schema();
gc_clock::time_point sstable::get_gc_before_for_drop_estimation(const gc_clock::time_point& compaction_time, const tombstone_gc_state& gc_state, const schema_ptr& s) const {
auto start = get_first_decorated_key().token();
auto end = get_last_decorated_key().token();
auto range = dht::token_range(dht::token_range::bound(start, true), dht::token_range::bound(end, true));
Expand All @@ -3050,9 +3049,8 @@ gc_clock::time_point sstable::get_gc_before_for_drop_estimation(const gc_clock::
// in the repair history map, we can not drop the sstable, in such case we
// return gc_clock::time_point::min() as gc_before. Otherwise, return the
// gc_before from the repair history map.
gc_clock::time_point sstable::get_gc_before_for_fully_expire(const gc_clock::time_point& compaction_time, const tombstone_gc_state& gc_state) const {
gc_clock::time_point sstable::get_gc_before_for_fully_expire(const gc_clock::time_point& compaction_time, const tombstone_gc_state& gc_state, const schema_ptr& s) const {
auto deletion_time = get_max_local_deletion_time();
auto s = get_schema();
// No need to query gc_before for the sstable if the max_deletion_time is max()
if (deletion_time == gc_clock::time_point(gc_clock::duration(std::numeric_limits<int>::max()))) {
sstlog.trace("sstable={}, ks={}, cf={}, get_max_local_deletion_time={}, min_timestamp={}, gc_grace_seconds={}, shortcut",
Expand Down
4 changes: 2 additions & 2 deletions sstables/sstables.hh
Original file line number Diff line number Diff line change
Expand Up @@ -935,8 +935,8 @@ public:
friend std::unique_ptr<DataConsumeRowsContext>
data_consume_rows(const schema&, shared_sstable, typename DataConsumeRowsContext::consumer&);
friend void lw_shared_ptr_deleter<sstables::sstable>::dispose(sstable* s);
gc_clock::time_point get_gc_before_for_drop_estimation(const gc_clock::time_point& compaction_time, const tombstone_gc_state& gc_state) const;
gc_clock::time_point get_gc_before_for_fully_expire(const gc_clock::time_point& compaction_time, const tombstone_gc_state& gc_state) const;
gc_clock::time_point get_gc_before_for_drop_estimation(const gc_clock::time_point& compaction_time, const tombstone_gc_state& gc_state, const schema_ptr& s) const;
gc_clock::time_point get_gc_before_for_fully_expire(const gc_clock::time_point& compaction_time, const tombstone_gc_state& gc_state, const schema_ptr& s) const;

future<uint32_t> read_digest();
future<checksum> read_checksum();
Expand Down

0 comments on commit ce60009

Please sign in to comment.