Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

pageserver: Reduce tracing overhead in timeline::get #6115

Merged
Merged
150 changes: 78 additions & 72 deletions pageserver/src/tenant/storage_layer/layer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -259,8 +259,9 @@ impl Layer {

layer
.get_value_reconstruct_data(key, lsn_range, reconstruct_data, &self.0, ctx)
.instrument(tracing::info_span!("get_value_reconstruct_data", layer=%self))
.instrument(tracing::debug_span!("get_value_reconstruct_data", layer=%self))
ivaxer marked this conversation as resolved.
Show resolved Hide resolved
.await
.with_context(|| format!("get_value_reconstruct_data for layer {self}"))
}

/// Download the layer if evicted.
Expand Down Expand Up @@ -654,7 +655,6 @@ impl LayerInner {
}

/// Cancellation safe.
#[tracing::instrument(skip_all, fields(layer=%self))]
async fn get_or_maybe_download(
self: &Arc<Self>,
allow_download: bool,
Expand All @@ -663,95 +663,101 @@ impl LayerInner {
let mut init_permit = None;

loop {
let download = move |permit| async move {
// disable any scheduled but not yet running eviction deletions for this
let next_version = 1 + self.version.fetch_add(1, Ordering::Relaxed);
let download = move |permit| {
async move {
// disable any scheduled but not yet running eviction deletions for this
let next_version = 1 + self.version.fetch_add(1, Ordering::Relaxed);

// count cancellations, which currently remain largely unexpected
let init_cancelled =
scopeguard::guard((), |_| LAYER_IMPL_METRICS.inc_init_cancelled());
// count cancellations, which currently remain largely unexpected
let init_cancelled =
scopeguard::guard((), |_| LAYER_IMPL_METRICS.inc_init_cancelled());

// no need to make the evict_and_wait wait for the actual download to complete
drop(self.status.send(Status::Downloaded));
// no need to make the evict_and_wait wait for the actual download to complete
drop(self.status.send(Status::Downloaded));

let timeline = self
.timeline
.upgrade()
.ok_or_else(|| DownloadError::TimelineShutdown)?;
let timeline = self
.timeline
.upgrade()
.ok_or_else(|| DownloadError::TimelineShutdown)?;

// FIXME: grab a gate
// FIXME: grab a gate

let can_ever_evict = timeline.remote_client.as_ref().is_some();
let can_ever_evict = timeline.remote_client.as_ref().is_some();

// check if we really need to be downloaded; could have been already downloaded by a
// cancelled previous attempt.
let needs_download = self
.needs_download()
.await
.map_err(DownloadError::PreStatFailed)?;
// check if we really need to be downloaded; could have been already downloaded by a
// cancelled previous attempt.
let needs_download = self
.needs_download()
.await
.map_err(DownloadError::PreStatFailed)?;

let permit = if let Some(reason) = needs_download {
if let NeedsDownload::NotFile(ft) = reason {
return Err(DownloadError::NotFile(ft));
}
let permit = if let Some(reason) = needs_download {
if let NeedsDownload::NotFile(ft) = reason {
return Err(DownloadError::NotFile(ft));
}

// only reset this after we've decided we really need to download. otherwise it'd
// be impossible to mark cancelled downloads for eviction, like one could imagine
// we would like to do for prefetching which was not needed.
self.wanted_evicted.store(false, Ordering::Release);
// only reset this after we've decided we really need to download. otherwise it'd
// be impossible to mark cancelled downloads for eviction, like one could imagine
// we would like to do for prefetching which was not needed.
self.wanted_evicted.store(false, Ordering::Release);

if !can_ever_evict {
return Err(DownloadError::NoRemoteStorage);
}
if !can_ever_evict {
return Err(DownloadError::NoRemoteStorage);
}

if let Some(ctx) = ctx {
self.check_expected_download(ctx)?;
}
if let Some(ctx) = ctx {
self.check_expected_download(ctx)?;
}

if !allow_download {
// this does look weird, but for LayerInner the "downloading" means also changing
// internal once related state ...
return Err(DownloadError::DownloadRequired);
}
if !allow_download {
// this does look weird, but for LayerInner the "downloading" means also changing
// internal once related state ...
return Err(DownloadError::DownloadRequired);
}

tracing::info!(%reason, "downloading on-demand");
tracing::info!(%reason, "downloading on-demand");

self.spawn_download_and_wait(timeline, permit).await?
} else {
// the file is present locally, probably by a previous but cancelled call to
// get_or_maybe_download. alternatively we might be running without remote storage.
LAYER_IMPL_METRICS.inc_init_needed_no_download();
self.spawn_download_and_wait(timeline, permit).await?
} else {
// the file is present locally, probably by a previous but cancelled call to
// get_or_maybe_download. alternatively we might be running without remote storage.
LAYER_IMPL_METRICS.inc_init_needed_no_download();

permit
};

let since_last_eviction =
self.last_evicted_at.lock().unwrap().map(|ts| ts.elapsed());
if let Some(since_last_eviction) = since_last_eviction {
// FIXME: this will not always be recorded correctly until #6028 (the no
// download needed branch above)
LAYER_IMPL_METRICS.record_redownloaded_after(since_last_eviction);
}

permit
};
let res = Arc::new(DownloadedLayer {
owner: Arc::downgrade(self),
kind: tokio::sync::OnceCell::default(),
version: next_version,
});

let since_last_eviction =
self.last_evicted_at.lock().unwrap().map(|ts| ts.elapsed());
if let Some(since_last_eviction) = since_last_eviction {
// FIXME: this will not always be recorded correctly until #6028 (the no
// download needed branch above)
LAYER_IMPL_METRICS.record_redownloaded_after(since_last_eviction);
}
self.access_stats.record_residence_event(
LayerResidenceStatus::Resident,
LayerResidenceEventReason::ResidenceChange,
);

let res = Arc::new(DownloadedLayer {
owner: Arc::downgrade(self),
kind: tokio::sync::OnceCell::default(),
version: next_version,
});
let waiters = self.inner.initializer_count();
if waiters > 0 {
tracing::info!(
waiters,
"completing the on-demand download for other tasks"
);
}

self.access_stats.record_residence_event(
LayerResidenceStatus::Resident,
LayerResidenceEventReason::ResidenceChange,
);
scopeguard::ScopeGuard::into_inner(init_cancelled);

let waiters = self.inner.initializer_count();
if waiters > 0 {
tracing::info!(waiters, "completing the on-demand download for other tasks");
Ok((ResidentOrWantedEvicted::Resident(res), permit))
}

scopeguard::ScopeGuard::into_inner(init_cancelled);

Ok((ResidentOrWantedEvicted::Resident(res), permit))
.instrument(tracing::info_span!("get_or_maybe_download", layer=%self))
};

if let Some(init_permit) = init_permit.take() {
Expand Down
4 changes: 2 additions & 2 deletions test_runner/regress/test_broken_timeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def test_local_corruption(neon_env_builder: NeonEnvBuilder):

env.pageserver.allowed_errors.extend(
[
".*layer loading failed:.*",
".*get_value_reconstruct_data for layer .*",
".*could not find data for key.*",
".*is not active. Current state: Broken.*",
".*will not become active. Current state: Broken.*",
Expand Down Expand Up @@ -83,7 +83,7 @@ def test_local_corruption(neon_env_builder: NeonEnvBuilder):
# (We don't check layer file contents on startup, when loading the timeline)
#
# This will change when we implement checksums for layers
with pytest.raises(Exception, match="layer loading failed:") as err:
with pytest.raises(Exception, match="get_value_reconstruct_data for layer ") as err:
pg2.start()
log.info(
f"As expected, compute startup failed for timeline {tenant2}/{timeline2} with corrupt layers: {err}"
Expand Down
Loading