Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[reconfigurator] Decommission sleds #5698

Merged
merged 21 commits into from
May 13, 2024
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
adec837
Add sled_state map to blueprints
jgallagher Apr 29, 2024
ab67000
update blueprint db serialization
jgallagher Apr 29, 2024
6faa77e
expectorate openapi
jgallagher Apr 29, 2024
e985d91
schema migration: backfill sled states for past blueprints
jgallagher Apr 29, 2024
d795f57
Merge branch 'main' into john/blueprint-store-sled-state
jgallagher May 1, 2024
0330f88
comment cleanup
jgallagher May 1, 2024
ae4f771
expectorate comment update
jgallagher May 1, 2024
4949eac
rework VPC/blueprint resolution test to use BlueprintBuilder
jgallagher May 1, 2024
36fde78
rename SledFilter::All -> ::Commissioned, omitting decommissioned sleds
jgallagher May 2, 2024
036f030
planner: mark sleds decommissioned
jgallagher May 2, 2024
aa715ad
add basic test for marking sleds decommissioned
jgallagher May 2, 2024
6eadb5f
decommissioning a sled decommissions its disks
jgallagher May 3, 2024
30bff33
blueprint realization: decommission sleds
jgallagher May 3, 2024
e6e6fef
prune fully decommissioned sleds from blueprints
jgallagher May 3, 2024
f1ce720
Merge branch 'main' into john/blueprint-store-sled-state
jgallagher May 6, 2024
7aabc98
Merge branch 'john/blueprint-store-sled-state' into john/mark-sleds-d…
jgallagher May 6, 2024
51e38c5
Merge branch 'main' into john/mark-sleds-decommissioned
jgallagher May 13, 2024
8d08368
clarify omdb db sleds output
jgallagher May 13, 2024
64e8660
clarify BlueprintBuilder::into_zones_map()
jgallagher May 13, 2024
de8a027
minor cleanup from PR feedback
jgallagher May 13, 2024
1e629df
planner: finding a decommissioned sleds with non-expunged zones is an…
jgallagher May 13, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion dev-tools/omdb/src/bin/omdb/db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1456,7 +1456,7 @@ async fn cmd_db_sleds(
Some(filter) => filter,
None => {
eprintln!("note: listing all sleds (use -F to filter, e.g. -F in-service)");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

note likely needs to be changed

Copy link
Contributor Author

@jgallagher jgallagher May 13, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changed the wording here to note it's "all commissioned sleds" in 8d08368. The followon PR to this (#5733) adds a SledFilter::Decommissioned that allows us to see the sleds that are left out by default here.

SledFilter::All
SledFilter::Commissioned
}
};

Expand Down
2 changes: 1 addition & 1 deletion dev-tools/omdb/tests/test_all_output.rs
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ async fn test_omdb_success_cases(cptestctx: &ControlPlaneTestContext) {
// collection?
assert!(parsed
.planning_input
.all_sled_ids(SledFilter::All)
.all_sled_ids(SledFilter::Commissioned)
.next()
.is_some());
assert!(!parsed.collections.is_empty());
Expand Down
2 changes: 1 addition & 1 deletion dev-tools/omdb/tests/usage_errors.out
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ Options:
Show sleds that match the given filter

Possible values:
- all: All sleds
- commissioned: All sleds that are currently part of the control plane cluster
- discretionary: Sleds that are eligible for discretionary services
- in-service: Sleds that are in service (even if they might not be eligible
for discretionary services)
Expand Down
8 changes: 4 additions & 4 deletions dev-tools/reconfigurator-cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -559,7 +559,7 @@ fn cmd_sled_list(
.to_planning_input_builder()
.context("failed to generate planning input")?
.build();
let rows = planning_input.all_sled_resources(SledFilter::All).map(
let rows = planning_input.all_sled_resources(SledFilter::Commissioned).map(
|(sled_id, sled_resources)| Sled {
id: sled_id,
subnet: sled_resources.subnet.net().to_string(),
Expand Down Expand Up @@ -648,7 +648,7 @@ fn cmd_inventory_generate(
// has no zones on it.
let planning_input =
sim.system.to_planning_input_builder().unwrap().build();
for sled_id in planning_input.all_sled_ids(SledFilter::All) {
for sled_id in planning_input.all_sled_ids(SledFilter::Commissioned) {
builder
.found_sled_omicron_zones(
"fake sled agent",
Expand Down Expand Up @@ -1077,7 +1077,7 @@ fn cmd_load(
.context("generating planning input")?
.build();
for (sled_id, sled_details) in
loaded.planning_input.all_sleds(SledFilter::All)
loaded.planning_input.all_sleds(SledFilter::Commissioned)
{
if current_planning_input.sled_resources(&sled_id).is_some() {
swriteln!(
Expand Down Expand Up @@ -1202,7 +1202,7 @@ fn cmd_file_contents(args: FileContentsArgs) -> anyhow::Result<Option<String>> {
let mut s = String::new();

for (sled_id, sled_resources) in
loaded.planning_input.all_sled_resources(SledFilter::All)
loaded.planning_input.all_sled_resources(SledFilter::Commissioned)
{
swriteln!(
s,
Expand Down
15 changes: 13 additions & 2 deletions nexus/db-model/src/deployment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,13 @@ use crate::inventory::ZoneType;
use crate::omicron_zone_config::{OmicronZone, OmicronZoneNic};
use crate::schema::{
blueprint, bp_omicron_physical_disk, bp_omicron_zone, bp_omicron_zone_nic,
bp_sled_omicron_physical_disks, bp_sled_omicron_zones, bp_target,
bp_sled_omicron_physical_disks, bp_sled_omicron_zones, bp_sled_state,
bp_target,
};
use crate::typed_uuid::DbTypedUuid;
use crate::{
impl_enum_type, ipv6, Generation, MacAddr, Name, SqlU16, SqlU32, SqlU8,
impl_enum_type, ipv6, Generation, MacAddr, Name, SledState, SqlU16, SqlU32,
SqlU8,
};
use chrono::{DateTime, Utc};
use ipnetwork::IpNetwork;
Expand Down Expand Up @@ -103,6 +105,15 @@ impl From<BpTarget> for nexus_types::deployment::BlueprintTarget {
}
}

/// See [`nexus_types::deployment::Blueprint::sled_state`].
#[derive(Queryable, Clone, Debug, Selectable, Insertable)]
#[diesel(table_name = bp_sled_state)]
pub struct BpSledState {
pub blueprint_id: Uuid,
pub sled_id: DbTypedUuid<SledKind>,
pub sled_state: SledState,
}

/// See [`nexus_types::deployment::BlueprintPhysicalDisksConfig`].
#[derive(Queryable, Clone, Debug, Selectable, Insertable)]
#[diesel(table_name = bp_sled_omicron_physical_disks)]
Expand Down
9 changes: 9 additions & 0 deletions nexus/db-model/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1459,6 +1459,15 @@ table! {
}
}

table! {
bp_sled_state (blueprint_id, sled_id) {
blueprint_id -> Uuid,
sled_id -> Uuid,

sled_state -> crate::SledStateEnum,
}
}

table! {
bp_sled_omicron_physical_disks (blueprint_id, sled_id) {
blueprint_id -> Uuid,
Expand Down
3 changes: 2 additions & 1 deletion nexus/db-model/src/schema_versions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ use std::collections::BTreeMap;
///
/// This must be updated when you change the database schema. Refer to
/// schema/crdb/README.adoc in the root of this repository for details.
pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(55, 0, 0);
pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(56, 0, 0);

/// List of all past database schema versions, in *reverse* order
///
Expand All @@ -29,6 +29,7 @@ static KNOWN_VERSIONS: Lazy<Vec<KnownVersion>> = Lazy::new(|| {
// | leaving the first copy as an example for the next person.
// v
// KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"),
KnownVersion::new(56, "blueprint-add-sled-state"),
KnownVersion::new(55, "add-lookup-sled-by-policy-and-state-index"),
KnownVersion::new(54, "blueprint-add-external-ip-id"),
KnownVersion::new(53, "drop-service-table"),
Expand Down
75 changes: 73 additions & 2 deletions nexus/db-queries/src/db/datastore/deployment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,14 @@ use nexus_db_model::BpOmicronZone;
use nexus_db_model::BpOmicronZoneNic;
use nexus_db_model::BpSledOmicronPhysicalDisks;
use nexus_db_model::BpSledOmicronZones;
use nexus_db_model::BpSledState;
use nexus_db_model::BpTarget;
use nexus_types::deployment::Blueprint;
use nexus_types::deployment::BlueprintMetadata;
use nexus_types::deployment::BlueprintPhysicalDisksConfig;
use nexus_types::deployment::BlueprintTarget;
use nexus_types::deployment::BlueprintZonesConfig;
use nexus_types::external_api::views::SledState;
use omicron_common::api::external::DataPageParams;
use omicron_common::api::external::Error;
use omicron_common::api::external::ListResultVec;
Expand Down Expand Up @@ -109,6 +111,16 @@ impl DataStore {
let row_blueprint = DbBlueprint::from(blueprint);
let blueprint_id = row_blueprint.id;

let sled_states = blueprint
.sled_state
.iter()
.map(|(&sled_id, &state)| BpSledState {
blueprint_id,
sled_id: sled_id.into(),
sled_state: state.into(),
})
.collect::<Vec<_>>();

let sled_omicron_physical_disks = blueprint
.blueprint_disks
.iter()
Expand Down Expand Up @@ -187,6 +199,16 @@ impl DataStore {
.await?;
}

// Insert all the sled states for this blueprint.
{
use db::schema::bp_sled_state::dsl as sled_state;

let _ = diesel::insert_into(sled_state::bp_sled_state)
.values(sled_states)
.execute_async(&conn)
.await?;
}

// Insert all physical disks for this blueprint.

{
Expand Down Expand Up @@ -290,6 +312,41 @@ impl DataStore {
)
};

// Load the sled states for this blueprint.
let sled_state: BTreeMap<SledUuid, SledState> = {
use db::schema::bp_sled_state::dsl;

let mut sled_state = BTreeMap::new();
let mut paginator = Paginator::new(SQL_BATCH_SIZE);
while let Some(p) = paginator.next() {
let batch = paginated(
dsl::bp_sled_state,
dsl::sled_id,
&p.current_pagparams(),
)
.filter(dsl::blueprint_id.eq(blueprint_id))
.select(BpSledState::as_select())
.load_async(&*conn)
.await
.map_err(|e| {
public_error_from_diesel(e, ErrorHandler::Server)
})?;

paginator = p.found_batch(&batch, &|s| s.sled_id);

for s in batch {
let old = sled_state
.insert(s.sled_id.into(), s.sled_state.into());
bail_unless!(
old.is_none(),
"found duplicate sled ID in bp_sled_state: {}",
s.sled_id
);
}
}
sled_state
};

// Read this blueprint's `bp_sled_omicron_zones` rows, which describes
// the `OmicronZonesConfig` generation number for each sled that is a
// part of this blueprint. Construct the BTreeMap we ultimately need,
Expand Down Expand Up @@ -550,6 +607,7 @@ impl DataStore {
id: blueprint_id,
blueprint_zones,
blueprint_disks,
sled_state,
parent_blueprint_id,
internal_dns_version,
external_dns_version,
Expand Down Expand Up @@ -578,6 +636,7 @@ impl DataStore {

let (
nblueprints,
nsled_states,
nsled_physical_disks,
nphysical_disks,
nsled_agent_zones,
Expand Down Expand Up @@ -617,6 +676,17 @@ impl DataStore {
));
}

// Remove rows associated with sled states.
let nsled_states = {
use db::schema::bp_sled_state::dsl;
diesel::delete(
dsl::bp_sled_state
.filter(dsl::blueprint_id.eq(blueprint_id)),
)
.execute_async(&conn)
.await?
};

// Remove rows associated with Omicron physical disks
let nsled_physical_disks = {
use db::schema::bp_sled_omicron_physical_disks::dsl;
Expand Down Expand Up @@ -670,6 +740,7 @@ impl DataStore {

Ok((
nblueprints,
nsled_states,
nsled_physical_disks,
nphysical_disks,
nsled_agent_zones,
Expand All @@ -688,6 +759,7 @@ impl DataStore {
info!(&opctx.log, "removed blueprint";
"blueprint_id" => blueprint_id.to_string(),
"nblueprints" => nblueprints,
"nsled_states" => nsled_states,
"nsled_physical_disks" => nsled_physical_disks,
"nphysical_disks" => nphysical_disks,
"nsled_agent_zones" => nsled_agent_zones,
Expand Down Expand Up @@ -1267,7 +1339,6 @@ mod tests {
use nexus_types::external_api::views::PhysicalDiskPolicy;
use nexus_types::external_api::views::PhysicalDiskState;
use nexus_types::external_api::views::SledPolicy;
use nexus_types::external_api::views::SledState;
use nexus_types::inventory::Collection;
use omicron_common::address::Ipv6Subnet;
use omicron_common::disk::DiskIdentity;
Expand Down Expand Up @@ -1482,7 +1553,7 @@ mod tests {
// Check the number of blueprint elements against our collection.
assert_eq!(
blueprint1.blueprint_zones.len(),
planning_input.all_sled_ids(SledFilter::All).count(),
planning_input.all_sled_ids(SledFilter::Commissioned).count(),
);
assert_eq!(
blueprint1.blueprint_zones.len(),
Expand Down
2 changes: 2 additions & 0 deletions nexus/db-queries/src/db/datastore/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,8 @@ use nexus_db_model::AllSchemaVersions;
pub use probe::ProbeInfo;
pub use rack::RackInit;
pub use silo::Discoverability;
pub use sled::SledTransition;
pub use sled::TransitionError;
pub use switch_port::SwitchPortSettingsCombinedResult;
pub use virtual_provisioning_collection::StorageType;
pub use volume::read_only_resources_associated_with_volume;
Expand Down
12 changes: 12 additions & 0 deletions nexus/db-queries/src/db/datastore/rack.rs
Original file line number Diff line number Diff line change
Expand Up @@ -964,6 +964,7 @@ mod test {
BlueprintZoneDisposition, OmicronZoneExternalSnatIp,
};
use nexus_types::external_api::shared::SiloIdentityMode;
use nexus_types::external_api::views::SledState;
use nexus_types::identity::Asset;
use nexus_types::internal_api::params::DnsRecord;
use nexus_types::inventory::NetworkInterface;
Expand Down Expand Up @@ -996,6 +997,7 @@ mod test {
id: Uuid::new_v4(),
blueprint_zones: BTreeMap::new(),
blueprint_disks: BTreeMap::new(),
sled_state: BTreeMap::new(),
parent_blueprint_id: None,
internal_dns_version: *Generation::new(),
external_dns_version: *Generation::new(),
Expand Down Expand Up @@ -1234,6 +1236,12 @@ mod test {
}
}

fn sled_states_active(
sled_ids: impl Iterator<Item = SledUuid>,
) -> BTreeMap<SledUuid, SledState> {
sled_ids.map(|sled_id| (sled_id, SledState::Active)).collect()
}

#[tokio::test]
async fn rack_set_initialized_with_services() {
let test_name = "rack_set_initialized_with_services";
Expand Down Expand Up @@ -1466,6 +1474,7 @@ mod test {
}
let blueprint = Blueprint {
id: Uuid::new_v4(),
sled_state: sled_states_active(blueprint_zones.keys().copied()),
blueprint_zones,
blueprint_disks: BTreeMap::new(),
parent_blueprint_id: None,
Expand Down Expand Up @@ -1721,6 +1730,7 @@ mod test {
}
let blueprint = Blueprint {
id: Uuid::new_v4(),
sled_state: sled_states_active(blueprint_zones.keys().copied()),
blueprint_zones,
blueprint_disks: BTreeMap::new(),
parent_blueprint_id: None,
Expand Down Expand Up @@ -1932,6 +1942,7 @@ mod test {
}
let blueprint = Blueprint {
id: Uuid::new_v4(),
sled_state: sled_states_active(blueprint_zones.keys().copied()),
blueprint_zones,
blueprint_disks: BTreeMap::new(),
parent_blueprint_id: None,
Expand Down Expand Up @@ -2070,6 +2081,7 @@ mod test {
}
let blueprint = Blueprint {
id: Uuid::new_v4(),
sled_state: sled_states_active(blueprint_zones.keys().copied()),
blueprint_zones,
blueprint_disks: BTreeMap::new(),
parent_blueprint_id: None,
Expand Down
Loading