From ee9ec26808d71e441b7d0c96bf9a046ced831f88 Mon Sep 17 00:00:00 2001 From: John Spray Date: Tue, 23 Apr 2024 17:16:17 +0100 Subject: [PATCH] pageserver: change pitr_interval=0 behavior (#7423) ## Problem We already made a change in #6407 to make pitr_interval authoritative for synthetic size calculations (do not charge users for data retained due to gc_horizon), but that change didn't cover the case where someone entirely disables time-based retention by setting pitr_interval=0 Relates to: https://github.com/neondatabase/neon/issues/6374 ## Summary of changes When pitr_interval is zero, do not set `pitr_cutoff` based on gc_horizon. gc_horizon is still enforced, but separately (its value is passed separately, there was never a need to claim pitr_cutoff to gc_horizon) ## More detail ### Issue 1 Before this PR, we would skip the update_gc_info for timelines with last_record_lsn() < gc_horizon. Let's call such timelines "tiny". The rationale for that presumably was that we can't GC anything in the tiny timelines, why bother to call update_gc_info(). However, synthetic size calculation relies on up-to-date update_gc_info() data. Before this PR, tiny timelines would never get an updated GcInfo::pitr_horizon (it remained Lsn(0)). Even on projects with pitr_interval=0d. With this PR, update_gc_info is always called, hence GcInfo::pitr_horizon is always updated, thereby providing synthetic size calculation with up-to-data data. ### Issue 2 Before this PR, regardless of whether the timeline is "tiny" or not, GcInfo::pitr_horizon was clamped to at least last_record_lsn - gc_horizon, even if the pitr window in terms of LSN range was shorter (=less than) the gc_horizon. With this PR, that clamping is removed, so, for pitr_interval=0, the pitr_horizon = last_record_lsn. --- pageserver/src/tenant.rs | 29 +++++----- pageserver/src/tenant/timeline.rs | 5 +- test_runner/regress/test_tenant_size.py | 71 +++++-------------------- 3 files changed, 30 insertions(+), 75 deletions(-) diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 098bad71fbbd..15350e93e9c3 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -2870,20 +2870,23 @@ impl Tenant { } } - if let Some(cutoff) = timeline.get_last_record_lsn().checked_sub(horizon) { - let branchpoints: Vec = all_branchpoints - .range(( - Included((timeline_id, Lsn(0))), - Included((timeline_id, Lsn(u64::MAX))), - )) - .map(|&x| x.1) - .collect(); - timeline - .update_gc_info(branchpoints, cutoff, pitr, cancel, ctx) - .await?; + let cutoff = timeline + .get_last_record_lsn() + .checked_sub(horizon) + .unwrap_or(Lsn(0)); + + let branchpoints: Vec = all_branchpoints + .range(( + Included((timeline_id, Lsn(0))), + Included((timeline_id, Lsn(u64::MAX))), + )) + .map(|&x| x.1) + .collect(); + timeline + .update_gc_info(branchpoints, cutoff, pitr, cancel, ctx) + .await?; - gc_timelines.push(timeline); - } + gc_timelines.push(timeline); } drop(gc_cs); Ok(gc_timelines) diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 2fbe3c63a254..22b8a1787469 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -4244,9 +4244,8 @@ impl Timeline { *self.get_latest_gc_cutoff_lsn() } } else { - // No time-based retention was configured. Set time-based cutoff to - // same as LSN based. - cutoff_horizon + // No time-based retention was configured. Interpret this as "keep no history". + self.get_last_record_lsn() }; // Grab the lock and update the values diff --git a/test_runner/regress/test_tenant_size.py b/test_runner/regress/test_tenant_size.py index 4c8fd4b0e5d7..a588f6ab534f 100644 --- a/test_runner/regress/test_tenant_size.py +++ b/test_runner/regress/test_tenant_size.py @@ -292,33 +292,12 @@ def test_single_branch_get_tenant_size_grows( Operate on single branch reading the tenants size after each transaction. """ - # Disable automatic gc and compaction. - # The pitr_interval here is quite problematic, so we cannot really use it. - # it'd have to be calibrated per test executing env. - - # there was a bug which was hidden if the create table and first batch of - # inserts is larger than gc_horizon. for example 0x20000 here hid the fact - # that there next_gc_cutoff could be smaller than initdb_lsn, which will - # obviously lead to issues when calculating the size. - gc_horizon = 0x3BA00 - - # it's a bit of a hack, but different versions of postgres have different - # amount of WAL generated for the same amount of data. so we need to - # adjust the gc_horizon accordingly. - if pg_version == PgVersion.V14: - gc_horizon = 0x4A000 - elif pg_version == PgVersion.V15: - gc_horizon = 0x3BA00 - elif pg_version == PgVersion.V16: - gc_horizon = 210000 - else: - raise NotImplementedError(pg_version) - + # Disable automatic compaction and GC, and set a long PITR interval: we will expect + # size to always increase with writes as all writes remain within the PITR tenant_config = { "compaction_period": "0s", "gc_period": "0s", - "pitr_interval": "0s", - "gc_horizon": gc_horizon, + "pitr_interval": "3600s", } env = neon_env_builder.init_start(initial_tenant_conf=tenant_config) @@ -332,18 +311,6 @@ def test_single_branch_get_tenant_size_grows( size_debug_file = open(test_output_dir / "size_debug.html", "w") - def check_size_change( - current_lsn: Lsn, initdb_lsn: Lsn, gc_horizon: int, size: int, prev_size: int - ): - if current_lsn - initdb_lsn >= gc_horizon: - assert ( - size >= prev_size - ), f"tenant_size may grow or not grow, because we only add gc_horizon amount of WAL to initial snapshot size (Currently at: {current_lsn}, Init at: {initdb_lsn})" - else: - assert ( - size > prev_size - ), f"tenant_size should grow, because we continue to add WAL to initial snapshot size (Currently at: {current_lsn}, Init at: {initdb_lsn})" - def get_current_consistent_size( env: NeonEnv, endpoint: Endpoint, @@ -412,14 +379,6 @@ def get_current_consistent_size( ) prev_size = collected_responses[-1][2] - - # branch start shouldn't be past gc_horizon yet - # thus the size should grow as we insert more data - # "gc_horizon" is tuned so that it kicks in _after_ the - # insert phase, but before the update phase ends. - assert ( - current_lsn - initdb_lsn <= gc_horizon - ), "Tuning of GC window is likely out-of-date" assert size > prev_size collected_responses.append(("INSERT", current_lsn, size)) @@ -439,8 +398,7 @@ def get_current_consistent_size( ) prev_size = collected_responses[-1][2] - - check_size_change(current_lsn, initdb_lsn, gc_horizon, size, prev_size) + assert size > prev_size collected_responses.append(("UPDATE", current_lsn, size)) @@ -457,8 +415,7 @@ def get_current_consistent_size( ) prev_size = collected_responses[-1][2] - - check_size_change(current_lsn, initdb_lsn, gc_horizon, size, prev_size) + assert size > prev_size collected_responses.append(("DELETE", current_lsn, size)) @@ -469,20 +426,20 @@ def get_current_consistent_size( with endpoint.cursor() as cur: cur.execute("DROP TABLE t0") - # Without setting a PITR interval, dropping the table doesn't reclaim any space - # from the user's point of view, because the DROP transaction is too small - # to fall out of gc_horizon. + # Dropping the table doesn't reclaim any space + # from the user's point of view, because the DROP transaction is still + # within pitr_interval. (current_lsn, size) = get_current_consistent_size( env, endpoint, size_debug_file, http_client, tenant_id, timeline_id ) - prev_size = collected_responses[-1][2] - check_size_change(current_lsn, initdb_lsn, gc_horizon, size, prev_size) + assert size >= prev_size + prev_size = size - # Set a tiny PITR interval to allow the DROP to impact the synthetic size + # Set a zero PITR interval to allow the DROP to impact the synthetic size # Because synthetic size calculation uses pitr interval when available, # when our tenant is configured with a tiny pitr interval, dropping a table should # cause synthetic size to go down immediately - tenant_config["pitr_interval"] = "1ms" + tenant_config["pitr_interval"] = "0s" env.pageserver.http_client().set_tenant_config(tenant_id, tenant_config) (current_lsn, size) = get_current_consistent_size( env, endpoint, size_debug_file, http_client, tenant_id, timeline_id @@ -494,10 +451,6 @@ def get_current_consistent_size( # defined by gc_horizon. collected_responses.append(("DROP", current_lsn, size)) - # Should have gone past gc_horizon, otherwise gc_horizon is too large - bytes_written = current_lsn - initdb_lsn - assert bytes_written > gc_horizon - # this isn't too many lines to forget for a while. observed while # developing these tests that locally the value is a bit more than what we # get in the ci.