Skip to content

Commit

Permalink
pageserver: fix early bail out in vectored get (#7038)
Browse files Browse the repository at this point in the history
## Problem
When vectored get encountered a portion of the key range that could
not be mapped to any layer in the current timeline it would incorrectly
bail out of the current timeline. This is incorrect since we may have
had layers queued for a visit in the fringe.

## Summary of changes
* Add a repro unit test
* Remove the early bail out path
* Simplify range search return value
  • Loading branch information
VladLazar committed Mar 7, 2024
1 parent 602a4da commit 871977f
Show file tree
Hide file tree
Showing 3 changed files with 176 additions and 22 deletions.
165 changes: 156 additions & 9 deletions pageserver/src/tenant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3679,7 +3679,10 @@ pub(crate) mod harness {
}

impl TenantHarness {
pub fn create(test_name: &'static str) -> anyhow::Result<Self> {
pub fn create_custom(
test_name: &'static str,
tenant_conf: TenantConf,
) -> anyhow::Result<Self> {
setup_logging();

let repo_dir = PageServerConf::test_repo_dir(test_name);
Expand All @@ -3691,14 +3694,6 @@ pub(crate) mod harness {
// OK in a test.
let conf: &'static PageServerConf = Box::leak(Box::new(conf));

// Disable automatic GC and compaction to make the unit tests more deterministic.
// The tests perform them manually if needed.
let tenant_conf = TenantConf {
gc_period: Duration::ZERO,
compaction_period: Duration::ZERO,
..TenantConf::default()
};

let tenant_id = TenantId::generate();
let tenant_shard_id = TenantShardId::unsharded(tenant_id);
fs::create_dir_all(conf.tenant_path(&tenant_shard_id))?;
Expand Down Expand Up @@ -3726,6 +3721,18 @@ pub(crate) mod harness {
})
}

pub fn create(test_name: &'static str) -> anyhow::Result<Self> {
// Disable automatic GC and compaction to make the unit tests more deterministic.
// The tests perform them manually if needed.
let tenant_conf = TenantConf {
gc_period: Duration::ZERO,
compaction_period: Duration::ZERO,
..TenantConf::default()
};

Self::create_custom(test_name, tenant_conf)
}

pub fn span(&self) -> tracing::Span {
info_span!("TenantHarness", tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug())
}
Expand Down Expand Up @@ -3833,6 +3840,7 @@ mod tests {
use crate::keyspace::KeySpaceAccum;
use crate::repository::{Key, Value};
use crate::tenant::harness::*;
use crate::tenant::timeline::CompactFlags;
use crate::DEFAULT_PG_VERSION;
use bytes::BytesMut;
use hex_literal::hex;
Expand Down Expand Up @@ -4637,6 +4645,145 @@ mod tests {
Ok(())
}

// Test that vectored get handles layer gaps correctly
// by advancing into the next ancestor timeline if required.
//
// The test generates timelines that look like the diagram below.
// We leave a gap in one of the L1 layers at `gap_at_key` (`/` in the diagram).
// The reconstruct data for that key lies in the ancestor timeline (`X` in the diagram).
//
// ```
//-------------------------------+
// ... |
// [ L1 ] |
// [ / L1 ] | Child Timeline
// ... |
// ------------------------------+
// [ X L1 ] | Parent Timeline
// ------------------------------+
// ```
#[tokio::test]
async fn test_get_vectored_key_gap() -> anyhow::Result<()> {
let tenant_conf = TenantConf {
// Make compaction deterministic
gc_period: Duration::ZERO,
compaction_period: Duration::ZERO,
// Encourage creation of L1 layers
checkpoint_distance: 16 * 1024,
compaction_target_size: 8 * 1024,
..TenantConf::default()
};

let harness = TenantHarness::create_custom("test_get_vectored_key_gap", tenant_conf)?;
let (tenant, ctx) = harness.load().await;

let mut current_key = Key::from_hex("010000000033333333444444445500000000").unwrap();
let gap_at_key = current_key.add(100);
let mut current_lsn = Lsn(0x10);

const KEY_COUNT: usize = 10_000;

let timeline_id = TimelineId::generate();
let current_timeline = tenant
.create_test_timeline(timeline_id, current_lsn, DEFAULT_PG_VERSION, &ctx)
.await?;

current_lsn += 0x100;

let writer = current_timeline.writer().await;
writer
.put(
gap_at_key,
current_lsn,
&Value::Image(test_img(&format!("{} at {}", gap_at_key, current_lsn))),
&ctx,
)
.await?;
writer.finish_write(current_lsn);
drop(writer);

let mut latest_lsns = HashMap::new();
latest_lsns.insert(gap_at_key, current_lsn);

current_timeline.freeze_and_flush().await?;

let child_timeline_id = TimelineId::generate();

tenant
.branch_timeline_test(
&current_timeline,
child_timeline_id,
Some(current_lsn),
&ctx,
)
.await?;
let child_timeline = tenant
.get_timeline(child_timeline_id, true)
.expect("Should have the branched timeline");

for i in 0..KEY_COUNT {
if current_key == gap_at_key {
current_key = current_key.next();
continue;
}

current_lsn += 0x10;

let writer = child_timeline.writer().await;
writer
.put(
current_key,
current_lsn,
&Value::Image(test_img(&format!("{} at {}", current_key, current_lsn))),
&ctx,
)
.await?;
writer.finish_write(current_lsn);
drop(writer);

latest_lsns.insert(current_key, current_lsn);
current_key = current_key.next();

// Flush every now and then to encourage layer file creation.
if i % 500 == 0 {
child_timeline.freeze_and_flush().await?;
}
}

child_timeline.freeze_and_flush().await?;
let mut flags = EnumSet::new();
flags.insert(CompactFlags::ForceRepartition);
child_timeline
.compact(&CancellationToken::new(), flags, &ctx)
.await?;

let key_near_end = {
let mut tmp = current_key;
tmp.field6 -= 10;
tmp
};

let key_near_gap = {
let mut tmp = gap_at_key;
tmp.field6 -= 10;
tmp
};

let read = KeySpace {
ranges: vec![key_near_gap..gap_at_key.next(), key_near_end..current_key],
};
let results = child_timeline
.get_vectored_impl(read.clone(), current_lsn, &ctx)
.await?;

for (key, img_res) in results {
let expected = test_img(&format!("{} at {}", key, latest_lsns[&key]));
assert_eq!(img_res?, expected);
}

Ok(())
}

#[tokio::test]
async fn test_random_updates() -> anyhow::Result<()> {
let harness = TenantHarness::create("test_random_updates")?;
Expand Down
24 changes: 18 additions & 6 deletions pageserver/src/tenant/layer_map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -460,15 +460,22 @@ impl LayerMap {
}
}

pub fn range_search(&self, key_range: Range<Key>, end_lsn: Lsn) -> Option<RangeSearchResult> {
let version = self.historic.get().unwrap().get_version(end_lsn.0 - 1)?;
pub fn range_search(&self, key_range: Range<Key>, end_lsn: Lsn) -> RangeSearchResult {
let version = match self.historic.get().unwrap().get_version(end_lsn.0 - 1) {
Some(version) => version,
None => {
let mut result = RangeSearchResult::new();
result.not_found.add_range(key_range);
return result;
}
};

let raw_range = key_range.start.to_i128()..key_range.end.to_i128();
let delta_changes = version.delta_coverage.range_overlaps(&raw_range);
let image_changes = version.image_coverage.range_overlaps(&raw_range);

let collector = RangeSearchCollector::new(key_range, end_lsn, delta_changes, image_changes);
Some(collector.collect())
collector.collect()
}

/// Start a batch of updates, applied on drop
Expand Down Expand Up @@ -995,8 +1002,13 @@ mod tests {
let layer_map = LayerMap::default();
let range = Key::from_i128(100)..Key::from_i128(200);

let res = layer_map.range_search(range, Lsn(100));
assert!(res.is_none());
let res = layer_map.range_search(range.clone(), Lsn(100));
assert_eq!(
res.not_found.to_keyspace(),
KeySpace {
ranges: vec![range]
}
);
}

#[test]
Expand Down Expand Up @@ -1033,7 +1045,7 @@ mod tests {
for start in 0..60 {
for end in (start + 1)..60 {
let range = Key::from_i128(start)..Key::from_i128(end);
let result = layer_map.range_search(range.clone(), Lsn(100)).unwrap();
let result = layer_map.range_search(range.clone(), Lsn(100));
let expected = brute_force_range_search(&layer_map, range, Lsn(100));

assert_range_search_result_eq(result, expected);
Expand Down
9 changes: 2 additions & 7 deletions pageserver/src/tenant/timeline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2784,7 +2784,7 @@ impl Timeline {
let guard = timeline.layers.read().await;
let layers = guard.layer_map();

'outer: loop {
loop {
if cancel.is_cancelled() {
return Err(GetVectoredError::Cancelled);
}
Expand All @@ -2810,12 +2810,7 @@ impl Timeline {
}
None => {
for range in unmapped_keyspace.ranges.iter() {
let results = match layers.range_search(range.clone(), cont_lsn) {
Some(res) => res,
None => {
break 'outer;
}
};
let results = layers.range_search(range.clone(), cont_lsn);

results
.found
Expand Down

1 comment on commit 871977f

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

2570 tests run: 2435 passed, 0 failed, 135 skipped (full report)


Flaky tests (1)

Postgres 16

  • test_compute_pageserver_connection_stress: debug

Code coverage* (full report)

  • functions: 28.8% (7000 of 24315 functions)
  • lines: 47.5% (43198 of 91017 lines)

* collected from Rust tests only


The comment gets automatically updated with the latest test results
871977f at 2024-03-07T16:57:00.094Z :recycle:

Please sign in to comment.