Skip to content

Commit

Permalink
pageserver: change pitr_interval=0 behavior (#7423)
Browse files Browse the repository at this point in the history
## Problem

We already made a change in #6407 to make pitr_interval authoritative
for synthetic size calculations (do not charge users for data retained
due to gc_horizon), but that change didn't cover the case where someone
entirely disables time-based retention by setting pitr_interval=0

Relates to: #6374

## Summary of changes

When pitr_interval is zero, do not set `pitr_cutoff` based on
gc_horizon.

gc_horizon is still enforced, but separately (its value is passed
separately, there was never a need to claim pitr_cutoff to gc_horizon)

## More detail

### Issue 1
Before this PR, we would skip the update_gc_info for timelines with
last_record_lsn() < gc_horizon.
Let's call such timelines "tiny".

The rationale for that presumably was that we can't GC anything in the
tiny timelines, why bother to call update_gc_info().

However, synthetic size calculation relies on up-to-date
update_gc_info() data.

Before this PR, tiny timelines would never get an updated
GcInfo::pitr_horizon (it remained Lsn(0)).
Even on projects with pitr_interval=0d.

With this PR, update_gc_info is always called, hence
GcInfo::pitr_horizon is always updated, thereby
providing synthetic size calculation with up-to-data data.

### Issue 2
Before this PR, regardless of whether the timeline is "tiny" or not,
GcInfo::pitr_horizon was clamped to at least last_record_lsn -
gc_horizon, even if the pitr window in terms of LSN range was shorter
(=less than) the gc_horizon.

With this PR, that clamping is removed, so, for pitr_interval=0, the
pitr_horizon = last_record_lsn.
  • Loading branch information
jcsp committed Apr 23, 2024
1 parent e22c072 commit ee9ec26
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 75 deletions.
29 changes: 16 additions & 13 deletions pageserver/src/tenant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2870,20 +2870,23 @@ impl Tenant {
}
}

if let Some(cutoff) = timeline.get_last_record_lsn().checked_sub(horizon) {
let branchpoints: Vec<Lsn> = all_branchpoints
.range((
Included((timeline_id, Lsn(0))),
Included((timeline_id, Lsn(u64::MAX))),
))
.map(|&x| x.1)
.collect();
timeline
.update_gc_info(branchpoints, cutoff, pitr, cancel, ctx)
.await?;
let cutoff = timeline
.get_last_record_lsn()
.checked_sub(horizon)
.unwrap_or(Lsn(0));

let branchpoints: Vec<Lsn> = all_branchpoints
.range((
Included((timeline_id, Lsn(0))),
Included((timeline_id, Lsn(u64::MAX))),
))
.map(|&x| x.1)
.collect();
timeline
.update_gc_info(branchpoints, cutoff, pitr, cancel, ctx)
.await?;

gc_timelines.push(timeline);
}
gc_timelines.push(timeline);
}
drop(gc_cs);
Ok(gc_timelines)
Expand Down
5 changes: 2 additions & 3 deletions pageserver/src/tenant/timeline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4244,9 +4244,8 @@ impl Timeline {
*self.get_latest_gc_cutoff_lsn()
}
} else {
// No time-based retention was configured. Set time-based cutoff to
// same as LSN based.
cutoff_horizon
// No time-based retention was configured. Interpret this as "keep no history".
self.get_last_record_lsn()
};

// Grab the lock and update the values
Expand Down
71 changes: 12 additions & 59 deletions test_runner/regress/test_tenant_size.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,33 +292,12 @@ def test_single_branch_get_tenant_size_grows(
Operate on single branch reading the tenants size after each transaction.
"""

# Disable automatic gc and compaction.
# The pitr_interval here is quite problematic, so we cannot really use it.
# it'd have to be calibrated per test executing env.

# there was a bug which was hidden if the create table and first batch of
# inserts is larger than gc_horizon. for example 0x20000 here hid the fact
# that there next_gc_cutoff could be smaller than initdb_lsn, which will
# obviously lead to issues when calculating the size.
gc_horizon = 0x3BA00

# it's a bit of a hack, but different versions of postgres have different
# amount of WAL generated for the same amount of data. so we need to
# adjust the gc_horizon accordingly.
if pg_version == PgVersion.V14:
gc_horizon = 0x4A000
elif pg_version == PgVersion.V15:
gc_horizon = 0x3BA00
elif pg_version == PgVersion.V16:
gc_horizon = 210000
else:
raise NotImplementedError(pg_version)

# Disable automatic compaction and GC, and set a long PITR interval: we will expect
# size to always increase with writes as all writes remain within the PITR
tenant_config = {
"compaction_period": "0s",
"gc_period": "0s",
"pitr_interval": "0s",
"gc_horizon": gc_horizon,
"pitr_interval": "3600s",
}

env = neon_env_builder.init_start(initial_tenant_conf=tenant_config)
Expand All @@ -332,18 +311,6 @@ def test_single_branch_get_tenant_size_grows(

size_debug_file = open(test_output_dir / "size_debug.html", "w")

def check_size_change(
current_lsn: Lsn, initdb_lsn: Lsn, gc_horizon: int, size: int, prev_size: int
):
if current_lsn - initdb_lsn >= gc_horizon:
assert (
size >= prev_size
), f"tenant_size may grow or not grow, because we only add gc_horizon amount of WAL to initial snapshot size (Currently at: {current_lsn}, Init at: {initdb_lsn})"
else:
assert (
size > prev_size
), f"tenant_size should grow, because we continue to add WAL to initial snapshot size (Currently at: {current_lsn}, Init at: {initdb_lsn})"

def get_current_consistent_size(
env: NeonEnv,
endpoint: Endpoint,
Expand Down Expand Up @@ -412,14 +379,6 @@ def get_current_consistent_size(
)

prev_size = collected_responses[-1][2]

# branch start shouldn't be past gc_horizon yet
# thus the size should grow as we insert more data
# "gc_horizon" is tuned so that it kicks in _after_ the
# insert phase, but before the update phase ends.
assert (
current_lsn - initdb_lsn <= gc_horizon
), "Tuning of GC window is likely out-of-date"
assert size > prev_size

collected_responses.append(("INSERT", current_lsn, size))
Expand All @@ -439,8 +398,7 @@ def get_current_consistent_size(
)

prev_size = collected_responses[-1][2]

check_size_change(current_lsn, initdb_lsn, gc_horizon, size, prev_size)
assert size > prev_size

collected_responses.append(("UPDATE", current_lsn, size))

Expand All @@ -457,8 +415,7 @@ def get_current_consistent_size(
)

prev_size = collected_responses[-1][2]

check_size_change(current_lsn, initdb_lsn, gc_horizon, size, prev_size)
assert size > prev_size

collected_responses.append(("DELETE", current_lsn, size))

Expand All @@ -469,20 +426,20 @@ def get_current_consistent_size(
with endpoint.cursor() as cur:
cur.execute("DROP TABLE t0")

# Without setting a PITR interval, dropping the table doesn't reclaim any space
# from the user's point of view, because the DROP transaction is too small
# to fall out of gc_horizon.
# Dropping the table doesn't reclaim any space
# from the user's point of view, because the DROP transaction is still
# within pitr_interval.
(current_lsn, size) = get_current_consistent_size(
env, endpoint, size_debug_file, http_client, tenant_id, timeline_id
)
prev_size = collected_responses[-1][2]
check_size_change(current_lsn, initdb_lsn, gc_horizon, size, prev_size)
assert size >= prev_size
prev_size = size

# Set a tiny PITR interval to allow the DROP to impact the synthetic size
# Set a zero PITR interval to allow the DROP to impact the synthetic size
# Because synthetic size calculation uses pitr interval when available,
# when our tenant is configured with a tiny pitr interval, dropping a table should
# cause synthetic size to go down immediately
tenant_config["pitr_interval"] = "1ms"
tenant_config["pitr_interval"] = "0s"
env.pageserver.http_client().set_tenant_config(tenant_id, tenant_config)
(current_lsn, size) = get_current_consistent_size(
env, endpoint, size_debug_file, http_client, tenant_id, timeline_id
Expand All @@ -494,10 +451,6 @@ def get_current_consistent_size(
# defined by gc_horizon.
collected_responses.append(("DROP", current_lsn, size))

# Should have gone past gc_horizon, otherwise gc_horizon is too large
bytes_written = current_lsn - initdb_lsn
assert bytes_written > gc_horizon

# this isn't too many lines to forget for a while. observed while
# developing these tests that locally the value is a bit more than what we
# get in the ci.
Expand Down

1 comment on commit ee9ec26

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

2846 tests run: 2712 passed, 0 failed, 134 skipped (full report)


Flaky tests (1)

Postgres 16

  • test_vm_bit_clear_on_heap_lock: debug

Code coverage* (full report)

  • functions: 28.1% (6478 of 23035 functions)
  • lines: 47.0% (46008 of 97939 lines)

* collected from Rust tests only


The comment gets automatically updated with the latest test results
ee9ec26 at 2024-04-23T17:10:37.666Z :recycle:

Please sign in to comment.