Skip to content

Commit

Permalink
Make shard.rs easier to read
Browse files Browse the repository at this point in the history
  • Loading branch information
jcsp committed Apr 5, 2024
1 parent 66fc465 commit be6ad43
Showing 1 changed file with 94 additions and 51 deletions.
145 changes: 94 additions & 51 deletions libs/pageserver_api/src/shard.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,89 @@ use hex::FromHex;
use serde::{Deserialize, Serialize};
use utils::id::TenantId;

/// See docs/rfcs/031-sharding-static.md for an overview of sharding.
///
/// This module contains a variety of types used to represent the concept of sharding
/// a Neon tenant across multiple physical shards. Since there are quite a few of these,
/// we provide an summary here.
///
/// Types used to describe shards:
/// - [`ShardCount`] describes how many shards make up a tenant, plus the magic `unsharded` value
/// which identifies a tenant which is not shard-aware. This means its storage paths do not include
/// a shard suffix.
/// - [`ShardNumber`] is simply the zero-based index of a shard within a tenant.
/// - [`ShardIndex`] is the 2-tuple of `ShardCount` and `ShardNumber`, it's just like a `TenantShardId`
/// without the tenant ID. This is useful for things that are implicitly scoped to a particular
/// tenant, such as layer files.
/// - [`ShardIdentity`]` is the full description of a particular shard's parameters, in sufficient
/// detail to
/// - The [`ShardSlug`] is a terse formatter for of ShardCount and ShardNumber, written as
/// four hex digits. An unsharded tenant is `0000`.
/// - [`TenantShardId`] is the unique ID of a particular shard within a particular tenant
///
/// Types used to describe the parameters for data distribution in a sharded tenant:
/// - [`ShardStripeSize`] controls how long contiguous runs of [`Key`]s are when distributed across
/// multiple shards. Its value is given in 8kiB pages.
/// - [`ShardLayout`] describes the data distribution scheme, and at time of writing is
/// always zero: this is provided for future upgrades that might introduce different
/// data distribution schemes.
///
/// Examples:
/// - A legacy unsharded tenant has one shard with ShardCount(0), ShardNumber(0), and its slug is 0000
/// - A single sharded tenant has one shard with ShardCount(1), ShardNumber(0), and its slug is 0001
/// - In a tenant with 4 shards, each shard has ShardCount(N), ShardNumber(i) where i in 0..N-1 (inclusive),
/// and their slugs are 0004, 0104, 0204, and 0304.

#[derive(Ord, PartialOrd, Eq, PartialEq, Clone, Copy, Serialize, Deserialize, Debug, Hash)]
pub struct ShardNumber(pub u8);

#[derive(Ord, PartialOrd, Eq, PartialEq, Clone, Copy, Serialize, Deserialize, Debug, Hash)]
pub struct ShardCount(u8);

/// Combination of ShardNumber and ShardCount. For use within the context of a particular tenant,
/// when we need to know which shard we're dealing with, but do not need to know the full
/// ShardIdentity (because we won't be doing any page->shard mapping), and do not need to know
/// the fully qualified TenantShardId.
#[derive(Eq, PartialEq, PartialOrd, Ord, Clone, Copy, Hash)]
pub struct ShardIndex {
pub shard_number: ShardNumber,
pub shard_count: ShardCount,
}

/// The ShardIdentity contains enough information to map a [`Key`] to a [`ShardNumber`],
/// and to check whether that [`ShardNumber`] is the same as the current shard.
#[derive(Clone, Copy, Serialize, Deserialize, Eq, PartialEq, Debug)]
pub struct ShardIdentity {
pub number: ShardNumber,
pub count: ShardCount,
pub stripe_size: ShardStripeSize,
layout: ShardLayout,
}

/// Formatting helper, for generatin the `shard_id` label in traces.
struct ShardSlug<'a>(&'a TenantShardId);

/// TenantShardId globally identifies a particular shard in a particular tenant.
///
/// These are written as `<TenantId>-<ShardSlug>`, for example:
/// # The second shard in a two-shard tenant
/// 072f1291a5310026820b2fe4b2968934-0102
///
/// If the `ShardCount` is _unsharded_, the `TenantShardId` is written without
/// a shard suffix and is equivalent to the encoding of a `TenantId`: this enables
/// an unsharded [`TenantShardId`] to be used interchangably with a [`TenantId`].
///
/// The human-readable encoding of an unsharded TenantShardId, such as used in API URLs,
/// is both forward and backward compatible with TenantId: a legacy TenantId can be
/// decoded as a TenantShardId, and when re-encoded it will be parseable
/// as a TenantId.
#[derive(Eq, PartialEq, PartialOrd, Ord, Clone, Copy, Hash)]
pub struct TenantShardId {
pub tenant_id: TenantId,
pub shard_number: ShardNumber,
pub shard_count: ShardCount,
}

impl ShardCount {
pub const MAX: Self = Self(u8::MAX);

Expand All @@ -38,6 +115,7 @@ impl ShardCount {
self.0
}

///
pub fn is_unsharded(&self) -> bool {
self.0 == 0
}
Expand All @@ -53,33 +131,6 @@ impl ShardNumber {
pub const MAX: Self = Self(u8::MAX);
}

/// TenantShardId identify the units of work for the Pageserver.
///
/// These are written as `<tenant_id>-<shard number><shard-count>`, for example:
///
/// # The second shard in a two-shard tenant
/// 072f1291a5310026820b2fe4b2968934-0102
///
/// Historically, tenants could not have multiple shards, and were identified
/// by TenantId. To support this, TenantShardId has a special legacy
/// mode where `shard_count` is equal to zero: this represents a single-sharded
/// tenant which should be written as a TenantId with no suffix.
///
/// The human-readable encoding of TenantShardId, such as used in API URLs,
/// is both forward and backward compatible: a legacy TenantId can be
/// decoded as a TenantShardId, and when re-encoded it will be parseable
/// as a TenantId.
///
/// Note that the binary encoding is _not_ backward compatible, because
/// at the time sharding is introduced, there are no existing binary structures
/// containing TenantId that we need to handle.
#[derive(Eq, PartialEq, PartialOrd, Ord, Clone, Copy, Hash)]
pub struct TenantShardId {
pub tenant_id: TenantId,
pub shard_number: ShardNumber,
pub shard_count: ShardCount,
}

impl TenantShardId {
pub fn unsharded(tenant_id: TenantId) -> Self {
Self {
Expand Down Expand Up @@ -115,6 +166,9 @@ impl TenantShardId {
self.shard_number == ShardNumber(0)
}

/// The "unsharded" value is distinct from simply having a single shard: it represents
/// a tenant which is not shard-aware at all, and whose storage paths will not include
/// a shard suffix.
pub fn is_unsharded(&self) -> bool {
self.shard_number == ShardNumber(0) && self.shard_count.is_unsharded()
}
Expand Down Expand Up @@ -150,9 +204,6 @@ impl TenantShardId {
}
}

/// Formatting helper
struct ShardSlug<'a>(&'a TenantShardId);

impl<'a> std::fmt::Display for ShardSlug<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
Expand Down Expand Up @@ -222,16 +273,6 @@ impl From<[u8; 18]> for TenantShardId {
}
}

/// For use within the context of a particular tenant, when we need to know which
/// shard we're dealing with, but do not need to know the full ShardIdentity (because
/// we won't be doing any page->shard mapping), and do not need to know the fully qualified
/// TenantShardId.
#[derive(Eq, PartialEq, PartialOrd, Ord, Clone, Copy, Hash)]
pub struct ShardIndex {
pub shard_number: ShardNumber,
pub shard_count: ShardCount,
}

impl ShardIndex {
pub fn new(number: ShardNumber, count: ShardCount) -> Self {
Self {
Expand All @@ -246,6 +287,9 @@ impl ShardIndex {
}
}

/// The "unsharded" value is distinct from simply having a single shard: it represents
/// a tenant which is not shard-aware at all, and whose storage paths will not include
/// a shard suffix.
pub fn is_unsharded(&self) -> bool {
self.shard_number == ShardNumber(0) && self.shard_count == ShardCount(0)
}
Expand Down Expand Up @@ -313,6 +357,8 @@ impl Serialize for TenantShardId {
if serializer.is_human_readable() {
serializer.collect_str(self)
} else {
// Note: while human encoding of [`TenantShardId`] is backward and forward
// compatible, this binary encoding is not.
let mut packed: [u8; 18] = [0; 18];
packed[0..16].clone_from_slice(&self.tenant_id.as_arr());
packed[16] = self.shard_number.0;
Expand Down Expand Up @@ -390,16 +436,6 @@ const LAYOUT_BROKEN: ShardLayout = ShardLayout(255);
/// Default stripe size in pages: 256MiB divided by 8kiB page size.
const DEFAULT_STRIPE_SIZE: ShardStripeSize = ShardStripeSize(256 * 1024 / 8);

/// The ShardIdentity contains the information needed for one member of map
/// to resolve a key to a shard, and then check whether that shard is ==self.
#[derive(Clone, Copy, Serialize, Deserialize, Eq, PartialEq, Debug)]
pub struct ShardIdentity {
pub number: ShardNumber,
pub count: ShardCount,
pub stripe_size: ShardStripeSize,
layout: ShardLayout,
}

#[derive(thiserror::Error, Debug, PartialEq, Eq)]
pub enum ShardConfigError {
#[error("Invalid shard count")]
Expand Down Expand Up @@ -439,6 +475,9 @@ impl ShardIdentity {
}
}

/// The "unsharded" value is distinct from simply having a single shard: it represents
/// a tenant which is not shard-aware at all, and whose storage paths will not include
/// a shard suffix.
pub fn is_unsharded(&self) -> bool {
self.number == ShardNumber(0) && self.count == ShardCount(0)
}
Expand Down Expand Up @@ -487,6 +526,8 @@ impl ShardIdentity {
}

/// Return true if the key should be ingested by this shard
///
/// Shards must ingest _at least_ keys which return true from this check.
pub fn is_key_local(&self, key: &Key) -> bool {
assert!(!self.is_broken());
if self.count < ShardCount(2) || (key_is_shard0(key) && self.number == ShardNumber(0)) {
Expand All @@ -497,7 +538,9 @@ impl ShardIdentity {
}

/// Return true if the key should be discarded if found in this shard's
/// data store, e.g. during compaction after a split
/// data store, e.g. during compaction after a split.
///
/// Shards _may_ drop keys which return false here, but are not obliged to.
pub fn is_key_disposable(&self, key: &Key) -> bool {
if key_is_shard0(key) {
// Q: Why can't we dispose of shard0 content if we're not shard 0?
Expand Down

0 comments on commit be6ad43

Please sign in to comment.