Skip to content

Commit

Permalink
pageserver: fix cont lsn jump on vectored read path
Browse files Browse the repository at this point in the history
  • Loading branch information
VladLazar committed Apr 18, 2024
1 parent 13b9135 commit ee25fdf
Show file tree
Hide file tree
Showing 2 changed files with 173 additions and 2 deletions.
172 changes: 171 additions & 1 deletion pageserver/src/tenant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3848,6 +3848,8 @@ pub(crate) mod harness {

#[cfg(test)]
mod tests {
use std::collections::BTreeMap;

use super::*;
use crate::keyspace::KeySpaceAccum;
use crate::repository::{Key, Value};
Expand All @@ -3858,7 +3860,7 @@ mod tests {
use hex_literal::hex;
use pageserver_api::keyspace::KeySpace;
use rand::{thread_rng, Rng};
use tests::timeline::ShutdownMode;
use tests::timeline::{GetVectoredError, ShutdownMode};

static TEST_KEY: Lazy<Key> =
Lazy::new(|| Key::from_slice(&hex!("010000000033333333444444445500000001")));
Expand Down Expand Up @@ -4794,6 +4796,174 @@ mod tests {
Ok(())
}

// Test that vectored get descends into ancestor timelines correctly and
// does not return an image that's newer than requested.
//
// The diagram below ilustrates an interesting case. We have a parent timeline
// (top of the Lsn range) and a child timeline. The request key cannot be reconstructed
// from the child timeline, so the parent timeline must be visited. When advacing into
// the child timeline, the read path needs to remember what the requested Lsn was in
// order to avoid returning an image that's too new. The test below constructs such
// a timeline setup and does a few queries around the Lsn of each page image.
// ```
// LSN
// ^
// |
// |
// 500 | --------------------------------------> branch point
// 400 | X
// 300 | X
// 200 | --------------------------------------> requested lsn
// 100 | X
// |---------------------------------------> Key
// |
// ------> requested key
//
// Legend:
// * X - page images
// ```
#[tokio::test]
async fn test_get_vectored_ancestor_descent() -> anyhow::Result<()> {
let tenant_conf = TenantConf {
// Make compaction deterministic
gc_period: Duration::ZERO,
compaction_period: Duration::ZERO,
..TenantConf::default()
};

let harness = TenantHarness::create_custom("test_get_vectored_on_lsn_axis", tenant_conf)?;
let (tenant, ctx) = harness.load().await;

let start_key = Key::from_hex("010000000033333333444444445500000000").unwrap();
let end_key = start_key.add(1000);
let child_gap_at_key = start_key.add(500);
let mut parent_gap_lsns: BTreeMap<Lsn, String> = BTreeMap::new();

let mut current_lsn = Lsn(0x10);

let timeline_id = TimelineId::generate();
let parent_timeline = tenant
.create_test_timeline(timeline_id, current_lsn, DEFAULT_PG_VERSION, &ctx)
.await?;

current_lsn += 0x100;

for _ in 0..3 {
let mut key = start_key;
while key < end_key {
current_lsn += 0x10;

let image_value = format!("{} at {}", child_gap_at_key, current_lsn);

let mut writer = parent_timeline.writer().await;
writer
.put(
key,
current_lsn,
&Value::Image(test_img(&image_value)),
&ctx,
)
.await?;
writer.finish_write(current_lsn);

if key == child_gap_at_key {
parent_gap_lsns.insert(current_lsn, image_value);
}

key = key.next();
}

parent_timeline.freeze_and_flush().await?;
}

let child_timeline_id = TimelineId::generate();

tenant
.branch_timeline_test(&parent_timeline, child_timeline_id, Some(current_lsn), &ctx)
.await?;

let child_timeline = tenant
.get_timeline(child_timeline_id, true)
.expect("Should have the branched timeline");

let mut key = start_key;
while key < end_key {
if key == child_gap_at_key {
key = key.next();
continue;
}

current_lsn += 0x10;

let mut writer = child_timeline.writer().await;
writer
.put(
key,
current_lsn,
&Value::Image(test_img(&format!("{} at {}", key, current_lsn))),
&ctx,
)
.await?;
writer.finish_write(current_lsn);

key = key.next();
}

child_timeline.freeze_and_flush().await?;

let lsn_offsets: [i64; 5] = [-10, -1, 0, 1, 10];
let mut query_lsns = Vec::new();
for image_lsn in parent_gap_lsns.keys().rev() {
for offset in lsn_offsets {
query_lsns.push(Lsn(u64::try_from(i64::try_from(image_lsn.0)? + offset)?))
}
}

for query_lsn in query_lsns {
let results = child_timeline
.get_vectored_impl(
KeySpace {
ranges: vec![child_gap_at_key..child_gap_at_key.next()],
},
query_lsn,
&ctx,
)
.await;

let expected_item = parent_gap_lsns
.iter()
.rev()
.find(|(lsn, _)| **lsn <= query_lsn);

info!(
"Doing vectored read at LSN {}. Expecting image to be: {:?}",
query_lsn, expected_item
);

match expected_item {
Some((_, img_value)) => {
let key_results = results.expect("No vectored get error expected");
let key_result = &key_results[&child_gap_at_key];
let returned_img = key_result
.as_ref()
.expect("No page reconstruct error expected");

info!(
"Vectored read at LSN {} returned image {}",
query_lsn,
std::str::from_utf8(returned_img)?
);
assert_eq!(*returned_img, test_img(img_value));
}
None => {
assert!(matches!(results, Err(GetVectoredError::MissingKey(_))));
}
}
}

Ok(())
}

#[tokio::test]
async fn test_random_updates() -> anyhow::Result<()> {
let harness = TenantHarness::create("test_random_updates")?;
Expand Down
3 changes: 2 additions & 1 deletion pageserver/src/tenant/timeline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2968,7 +2968,8 @@ impl Timeline {
break;
}

cont_lsn = Lsn(timeline.ancestor_lsn.0 + 1);
// Take the min to avoid reconstructing a page with data newer than request Lsn.
cont_lsn = std::cmp::min(Lsn(request_lsn.0 + 1), Lsn(timeline.ancestor_lsn.0 + 1));
timeline_owned = timeline
.get_ready_ancestor_timeline(ctx)
.await
Expand Down

0 comments on commit ee25fdf

Please sign in to comment.