Skip to content

Commit

Permalink
logical size: better represent level of accuracy in the type system (#…
Browse files Browse the repository at this point in the history
…5999)

I would love to not expose the in-accurate value int he mgmt API at all,
and in fact control plane doesn't use it [^1].
But our tests do, and I have no desire to change them at this time.

[^1]: neondatabase/cloud#8317
  • Loading branch information
problame committed Dec 1, 2023
1 parent 805df0f commit 9275824
Show file tree
Hide file tree
Showing 6 changed files with 63 additions and 39 deletions.
4 changes: 3 additions & 1 deletion libs/pageserver_api/src/models.rs
Original file line number Diff line number Diff line change
Expand Up @@ -384,7 +384,9 @@ pub struct TimelineInfo {
/// The LSN that we are advertizing to safekeepers
pub remote_consistent_lsn_visible: Lsn,

pub current_logical_size: Option<u64>, // is None when timeline is Unloaded
pub current_logical_size: u64,
pub current_logical_size_is_accurate: bool,

/// Sum of the size of all layer files.
/// If a layer is present in both local FS and S3, it counts only once.
pub current_physical_size: Option<u64>, // is None when timeline is Unloaded
Expand Down
13 changes: 5 additions & 8 deletions pageserver/src/consumption_metrics/metrics.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
use crate::context::RequestContext;
use anyhow::Context;
use crate::{context::RequestContext, tenant::timeline::logical_size::CurrentLogicalSize};
use chrono::{DateTime, Utc};
use consumption_metrics::EventType;
use futures::stream::StreamExt;
Expand Down Expand Up @@ -352,13 +351,11 @@ impl TimelineSnapshot {

let current_exact_logical_size = {
let span = tracing::info_span!("collect_metrics_iteration", tenant_id = %t.tenant_shard_id.tenant_id, timeline_id = %t.timeline_id);
let res = span
.in_scope(|| t.get_current_logical_size(ctx))
.context("get_current_logical_size");
match res? {
let size = span.in_scope(|| t.get_current_logical_size(ctx));
match size {
// Only send timeline logical size when it is fully calculated.
(size, is_exact) if is_exact => Some(size),
(_, _) => None,
CurrentLogicalSize::Exact(ref size) => Some(size.into()),
CurrentLogicalSize::Approximate(_) => None,
}
};

Expand Down
14 changes: 6 additions & 8 deletions pageserver/src/http/routes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -338,13 +338,7 @@ async fn build_timeline_info_common(
Lsn(0) => None,
lsn @ Lsn(_) => Some(lsn),
};
let current_logical_size = match timeline.get_current_logical_size(ctx) {
Ok((size, _)) => Some(size),
Err(err) => {
error!("Timeline info creation failed to get current logical size: {err:?}");
None
}
};
let current_logical_size = timeline.get_current_logical_size(ctx);
let current_physical_size = Some(timeline.layer_size_sum().await);
let state = timeline.current_state();
let remote_consistent_lsn_projected = timeline
Expand All @@ -368,7 +362,11 @@ async fn build_timeline_info_common(
last_record_lsn,
prev_record_lsn: Some(timeline.get_prev_record_lsn()),
latest_gc_cutoff_lsn: *timeline.get_latest_gc_cutoff_lsn(),
current_logical_size,
current_logical_size: current_logical_size.size_dont_care_about_accuracy(),
current_logical_size_is_accurate: match current_logical_size.accuracy() {
tenant::timeline::logical_size::Accuracy::Approximate => false,
tenant::timeline::logical_size::Accuracy::Exact => true,
},
current_physical_size,
current_logical_size_non_incremental: None,
timeline_dir_layer_file_size_sum: None,
Expand Down
15 changes: 6 additions & 9 deletions pageserver/src/tenant/timeline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ pub mod delete;
mod eviction_task;
mod init;
pub mod layer_manager;
mod logical_size;
pub(crate) mod logical_size;
pub mod span;
pub mod uninit;
mod walreceiver;
Expand Down Expand Up @@ -855,23 +855,20 @@ impl Timeline {
/// the initial size calculation has not been run (gets triggered on the first size access).
///
/// return size and boolean flag that shows if the size is exact
pub fn get_current_logical_size(
pub(crate) fn get_current_logical_size(
self: &Arc<Self>,
ctx: &RequestContext,
) -> anyhow::Result<(u64, bool)> {
) -> logical_size::CurrentLogicalSize {
let current_size = self.current_logical_size.current_size();
debug!("Current size: {current_size:?}");

let mut is_exact = true;
let size = current_size.size();
if let (CurrentLogicalSize::Approximate(_), Some(initial_part_end)) =
(current_size, self.current_logical_size.initial_part_end)
{
is_exact = false;
self.try_spawn_size_init_task(initial_part_end, ctx);
}

Ok((size, is_exact))
current_size
}

/// Check if more than 'checkpoint_distance' of WAL has been accumulated in
Expand Down Expand Up @@ -2057,10 +2054,10 @@ impl Timeline {
// one value while current_logical_size is set to the
// other.
match logical_size.current_size() {
CurrentLogicalSize::Exact(new_current_size) => self
CurrentLogicalSize::Exact(ref new_current_size) => self
.metrics
.current_logical_size_gauge
.set(new_current_size),
.set(new_current_size.into()),
CurrentLogicalSize::Approximate(_) => {
// don't update the gauge yet, this allows us not to update the gauge back and
// forth between the initial size calculation task.
Expand Down
49 changes: 39 additions & 10 deletions pageserver/src/tenant/timeline/logical_size.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,21 +59,50 @@ pub(super) struct LogicalSize {

/// Normalized current size, that the data in pageserver occupies.
#[derive(Debug, Clone, Copy)]
pub(super) enum CurrentLogicalSize {
pub(crate) enum CurrentLogicalSize {
/// The size is not yet calculated to the end, this is an intermediate result,
/// constructed from walreceiver increments and normalized: logical data could delete some objects, hence be negative,
/// yet total logical size cannot be below 0.
Approximate(u64),
Approximate(Approximate),
// Fully calculated logical size, only other future walreceiver increments are changing it, and those changes are
// available for observation without any calculations.
Exact(u64),
Exact(Exact),
}

#[derive(Debug, Copy, Clone)]
pub(crate) enum Accuracy {
Approximate,
Exact,
}

#[derive(Debug, Clone, Copy)]
pub(crate) struct Approximate(u64);
#[derive(Debug, Clone, Copy)]
pub(crate) struct Exact(u64);

impl From<&Approximate> for u64 {
fn from(value: &Approximate) -> Self {
value.0
}
}

impl From<&Exact> for u64 {
fn from(val: &Exact) -> Self {
val.0
}
}

impl CurrentLogicalSize {
pub(super) fn size(&self) -> u64 {
*match self {
Self::Approximate(size) => size,
Self::Exact(size) => size,
pub(crate) fn size_dont_care_about_accuracy(&self) -> u64 {
match self {
Self::Approximate(size) => size.into(),
Self::Exact(size) => size.into(),
}
}
pub(crate) fn accuracy(&self) -> Accuracy {
match self {
Self::Approximate(_) => Accuracy::Approximate,
Self::Exact(_) => Accuracy::Exact,
}
}
}
Expand Down Expand Up @@ -109,16 +138,16 @@ impl LogicalSize {
match self.initial_logical_size.get() {
Some((initial_size, _)) => {
crate::metrics::initial_logical_size::CALLS.exact.inc();
CurrentLogicalSize::Exact(initial_size.checked_add_signed(size_increment)
CurrentLogicalSize::Exact(Exact(initial_size.checked_add_signed(size_increment)
.with_context(|| format!("Overflow during logical size calculation, initial_size: {initial_size}, size_increment: {size_increment}"))
.unwrap())
.unwrap()))
}
None => {
crate::metrics::initial_logical_size::CALLS
.approximate
.inc();
let non_negative_size_increment = u64::try_from(size_increment).unwrap_or(0);
CurrentLogicalSize::Approximate(non_negative_size_increment)
CurrentLogicalSize::Approximate(Approximate(non_negative_size_increment))
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -396,11 +396,12 @@ pub(super) async fn handle_walreceiver_connection(

// Send the replication feedback message.
// Regular standby_status_update fields are put into this message.
let (timeline_logical_size, _) = timeline
let current_timeline_size = timeline
.get_current_logical_size(&ctx)
.context("Status update creation failed to get current logical size")?;
// FIXME: https://github.com/neondatabase/neon/issues/5963
.size_dont_care_about_accuracy();
let status_update = PageserverFeedback {
current_timeline_size: timeline_logical_size,
current_timeline_size,
last_received_lsn,
disk_consistent_lsn,
remote_consistent_lsn,
Expand Down

0 comments on commit 9275824

Please sign in to comment.