From f2c63341dfd89e39c8d407fba4232af6d5e9c26a Mon Sep 17 00:00:00 2001 From: alan Date: Fri, 29 Aug 2025 20:00:51 +0000 Subject: [PATCH 01/50] Add debug to test flake step --- .../crucible_replacements.rs | 44 +++++++++++++------ 1 file changed, 30 insertions(+), 14 deletions(-) diff --git a/nexus/tests/integration_tests/crucible_replacements.rs b/nexus/tests/integration_tests/crucible_replacements.rs index 201a2d78c12..3bbb7295350 100644 --- a/nexus/tests/integration_tests/crucible_replacements.rs +++ b/nexus/tests/integration_tests/crucible_replacements.rs @@ -203,7 +203,7 @@ pub(crate) async fn wait_for_all_replacements( } }, &std::time::Duration::from_millis(50), - &std::time::Duration::from_secs(60), + &std::time::Duration::from_secs(260), ) .await .expect("all replacements finished"); @@ -1659,22 +1659,38 @@ mod region_snapshot_replacement { } pub async fn assert_read_only_target_gone(&self) { - let region_snapshot_replace_request = self - .datastore - .get_region_snapshot_replacement_request_by_id( - &self.opctx(), - self.replacement_request_id, - ) - .await - .unwrap(); + let mut failed = false; + for i in 0..10 { + let region_snapshot_replace_request = self + .datastore + .get_region_snapshot_replacement_request_by_id( + &self.opctx(), + self.replacement_request_id, + ) + .await + .unwrap(); - assert!( - self.datastore + let res = self + .datastore .read_only_target_addr(®ion_snapshot_replace_request) .await - .unwrap() - .is_none() - ); + .unwrap(); + + if res.is_none() { + // test pass, move on + if failed == false { + break; + } else { + panic!("Failed rotg {i} times before working"); + } + } + failed = true; + println!("snapshot that should be gone: {:?}", res); + tokio::time::sleep(std::time::Duration::from_secs(2)).await; + } + if failed { + panic!("failed rotg 10 times, never worked"); + } } pub async fn remove_disk_from_snapshot_rop(&self) { From 889ab2d3860a816232b4af67748f7b2de67f95a8 Mon Sep 17 00:00:00 2001 From: alan Date: Fri, 29 Aug 2025 21:02:45 +0000 Subject: [PATCH 02/50] more info when failing --- .../crucible_replacements.rs | 47 ++++++++++--------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/nexus/tests/integration_tests/crucible_replacements.rs b/nexus/tests/integration_tests/crucible_replacements.rs index 3bbb7295350..168a4527b38 100644 --- a/nexus/tests/integration_tests/crucible_replacements.rs +++ b/nexus/tests/integration_tests/crucible_replacements.rs @@ -1485,20 +1485,28 @@ mod region_snapshot_replacement { // Assert no volumes are referencing the snapshot address - let volumes = self - .datastore - .find_volumes_referencing_socket_addr( - &self.opctx(), - self.snapshot_socket_addr, - ) - .await - .unwrap(); + let mut failed = false; + for i in 1..10 { + let volumes = self + .datastore + .find_volumes_referencing_socket_addr( + &self.opctx(), + self.snapshot_socket_addr, + ) + .await + .unwrap(); - if !volumes.is_empty() { - eprintln!("{:?}", volumes); + if !volumes.is_empty() { + eprintln!("Volume should be gone, try {i} {:?}", volumes); + failed = true; + tokio::time::sleep(std::time::Duration::from_secs(2)).await; + } else { + break; + } + } + if failed { + panic!("Volume references not cleaned up at first try"); } - - assert!(volumes.is_empty()); } /// Assert no Crucible resources are leaked @@ -1649,10 +1657,11 @@ mod region_snapshot_replacement { match result { InsertStepResult::Inserted { .. } => {} - _ => { + x => { assert!( false, - "bad result from create_region_snapshot_replacement_step" + "bad result: {:?} from create_region_snapshot_replacement_step", + x ); } } @@ -1678,18 +1687,14 @@ mod region_snapshot_replacement { if res.is_none() { // test pass, move on - if failed == false { - break; - } else { - panic!("Failed rotg {i} times before working"); - } + break; } failed = true; - println!("snapshot that should be gone: {:?}", res); + eprintln!("loop {i}, snapshot that should be gone: {:?}", res); tokio::time::sleep(std::time::Duration::from_secs(2)).await; } if failed { - panic!("failed rotg 10 times, never worked"); + panic!("failed some number of times checking for target gone"); } } From 93c2bec9a763e5f1bb3e1bba87450adc6e0ff3de Mon Sep 17 00:00:00 2001 From: alan Date: Fri, 29 Aug 2025 21:37:10 +0000 Subject: [PATCH 03/50] more debug logging --- nexus/tests/integration_tests/crucible_replacements.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/nexus/tests/integration_tests/crucible_replacements.rs b/nexus/tests/integration_tests/crucible_replacements.rs index 168a4527b38..f8f643e91bc 100644 --- a/nexus/tests/integration_tests/crucible_replacements.rs +++ b/nexus/tests/integration_tests/crucible_replacements.rs @@ -1701,6 +1701,7 @@ mod region_snapshot_replacement { pub async fn remove_disk_from_snapshot_rop(&self) { let disk_url = get_disk_url("disk-from-snapshot"); + eprintln!("Remove disk from snapshot for disk {:?}", disk_url); let disk_from_snapshot: external::Disk = NexusRequest::object_get(&self.client, &disk_url) .authn_as(AuthnMode::PrivilegedUser) @@ -1712,6 +1713,7 @@ mod region_snapshot_replacement { let disk_id = disk_from_snapshot.identity.id; + eprintln!("Remove disk id {:?}", disk_id); // Note: `make_request` needs a type here, otherwise rustc cannot // figure out the type of the `request_body` parameter self.internal_client From d251d45718e89880c6e8c4bbf8772c98f2d6c47e Mon Sep 17 00:00:00 2001 From: alan Date: Fri, 29 Aug 2025 23:27:47 +0000 Subject: [PATCH 04/50] more debug --- .../region_snapshot_replacement_start.rs | 2 +- .../crucible_replacements.rs | 30 +++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/nexus/src/app/sagas/region_snapshot_replacement_start.rs b/nexus/src/app/sagas/region_snapshot_replacement_start.rs index 25ad4b9f84c..a977d4dd7bf 100644 --- a/nexus/src/app/sagas/region_snapshot_replacement_start.rs +++ b/nexus/src/app/sagas/region_snapshot_replacement_start.rs @@ -1543,7 +1543,7 @@ pub(crate) mod test { assert_eq!( db_request.replacement_state, RegionSnapshotReplacementState::Requested - ); + ); // ZZZ This failed Allocating != Requested assert_eq!(db_request.operating_saga_id, None); } diff --git a/nexus/tests/integration_tests/crucible_replacements.rs b/nexus/tests/integration_tests/crucible_replacements.rs index f8f643e91bc..bc86509c790 100644 --- a/nexus/tests/integration_tests/crucible_replacements.rs +++ b/nexus/tests/integration_tests/crucible_replacements.rs @@ -1697,6 +1697,32 @@ mod region_snapshot_replacement { panic!("failed some number of times checking for target gone"); } } + pub async fn pre_assert_read_only_target_gone(&self) { + eprintln!( + "PRE1 replace_request_id: {:?}", + self.replacement_request_id + ); + let region_snapshot_replace_request = self + .datastore + .get_region_snapshot_replacement_request_by_id( + &self.opctx(), + self.replacement_request_id, + ) + .await + .unwrap(); + + eprintln!( + "PRE2 rs_replace_request: {:?}", + region_snapshot_replace_request + ); + let res = self + .datastore + .read_only_target_addr(®ion_snapshot_replace_request) + .await + .unwrap(); + + eprintln!("PRE3 target that should be gone: {:?}", res); + } pub async fn remove_disk_from_snapshot_rop(&self) { let disk_url = get_disk_url("disk-from-snapshot"); @@ -1973,9 +1999,13 @@ async fn test_region_snapshot_replacement_step_after_rop_remove_target_gone( test_harness.transition_request_to_replacement_done().await; test_harness.transition_request_to_running().await; + test_harness.pre_assert_read_only_target_gone().await; test_harness.create_manual_region_snapshot_replacement_step().await; + test_harness.pre_assert_read_only_target_gone().await; test_harness.delete_the_disk().await; + test_harness.pre_assert_read_only_target_gone().await; test_harness.delete_the_snapshot().await; + test_harness.pre_assert_read_only_target_gone().await; // Remove the ROP of the disk created from the snapshot test_harness.remove_disk_from_snapshot_rop().await; From b1b197b2f3742639538ce5de9457984b927bdb5c Mon Sep 17 00:00:00 2001 From: alan Date: Sat, 30 Aug 2025 00:49:53 +0000 Subject: [PATCH 05/50] more debug on more tests --- .../crucible_replacements.rs | 34 ++++++++++++++----- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/nexus/tests/integration_tests/crucible_replacements.rs b/nexus/tests/integration_tests/crucible_replacements.rs index bc86509c790..a57d55f69d9 100644 --- a/nexus/tests/integration_tests/crucible_replacements.rs +++ b/nexus/tests/integration_tests/crucible_replacements.rs @@ -1071,24 +1071,33 @@ async fn test_racing_replacements_for_soft_deleted_disk_volume( activate_background_task(&internal_client, "region_replacement_driver") .await; - assert!(match last_background_task.last { + let res = match last_background_task.last { LastResult::Completed(last_result_completed) => { match serde_json::from_value::( last_result_completed.details, ) { Err(e) => { + eprintln!("Json not what we expected"); eprintln!("{e}"); false } - Ok(v) => !v.drive_invoked_ok.is_empty(), + Ok(v) => { + if !v.drive_invoked_ok.is_empty() { + true + } else { + eprintln!("v.drive_ok: {:?}", v.drive_invoked_ok); + false + } + } } } - - _ => { + x => { + eprintln!("Unexpected result here: {:?}", x); false } - }); + }; + assert!(res); // wait for the drive saga to complete here wait_for_condition( @@ -1669,6 +1678,10 @@ mod region_snapshot_replacement { pub async fn assert_read_only_target_gone(&self) { let mut failed = false; + eprintln!( + "NOW1 replace_request_id: {:?}", + self.replacement_request_id + ); for i in 0..10 { let region_snapshot_replace_request = self .datastore @@ -1678,6 +1691,10 @@ mod region_snapshot_replacement { ) .await .unwrap(); + eprintln!( + "NOW2 rs_replace_request: {:?}", + region_snapshot_replace_request + ); let res = self .datastore @@ -1685,13 +1702,14 @@ mod region_snapshot_replacement { .await .unwrap(); + eprintln!("NOW3 target that should be gone: {:?}", res); if res.is_none() { // test pass, move on break; } failed = true; eprintln!("loop {i}, snapshot that should be gone: {:?}", res); - tokio::time::sleep(std::time::Duration::from_secs(2)).await; + tokio::time::sleep(std::time::Duration::from_secs(4)).await; } if failed { panic!("failed some number of times checking for target gone"); @@ -1727,7 +1745,7 @@ mod region_snapshot_replacement { pub async fn remove_disk_from_snapshot_rop(&self) { let disk_url = get_disk_url("disk-from-snapshot"); - eprintln!("Remove disk from snapshot for disk {:?}", disk_url); + eprintln!("NOW Remove disk from snapshot for disk {:?}", disk_url); let disk_from_snapshot: external::Disk = NexusRequest::object_get(&self.client, &disk_url) .authn_as(AuthnMode::PrivilegedUser) @@ -1739,7 +1757,7 @@ mod region_snapshot_replacement { let disk_id = disk_from_snapshot.identity.id; - eprintln!("Remove disk id {:?}", disk_id); + eprintln!("NOW Remove disk id {:?}", disk_id); // Note: `make_request` needs a type here, otherwise rustc cannot // figure out the type of the `request_body` parameter self.internal_client From 6f129c406ca6e2bae2cc84d537ab4f251b0a3f0e Mon Sep 17 00:00:00 2001 From: alan Date: Tue, 2 Sep 2025 19:21:21 +0000 Subject: [PATCH 06/50] More debug --- .../crucible_replacements.rs | 33 ++++++++++++++----- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/nexus/tests/integration_tests/crucible_replacements.rs b/nexus/tests/integration_tests/crucible_replacements.rs index a57d55f69d9..65fa0dc179a 100644 --- a/nexus/tests/integration_tests/crucible_replacements.rs +++ b/nexus/tests/integration_tests/crucible_replacements.rs @@ -382,6 +382,7 @@ mod region_replacement { pub async fn delete_the_disk(&self) { let disk_url = get_disk_url("disk"); + eprintln!("Delete this disk: {:?}", disk_url); NexusRequest::object_delete(&self.client, &disk_url) .authn_as(AuthnMode::PrivilegedUser) .execute() @@ -403,6 +404,7 @@ mod region_replacement { // Assert the request is in state Complete + eprintln!("Waited for all replacements, including {:?}", self.replacement_request_id); let region_replacement = self .datastore .get_region_replacement_request_by_id( @@ -1663,16 +1665,31 @@ mod region_snapshot_replacement { .await .unwrap(); + // ZZZ: is "AlreadyHandled" an error here? + // Could that be a valid result if some other actor put the + // replacement step into place? + // We get back: + // bad result: AlreadyHandled { existing_step_id: 83e38140-f238-4fed-8cef-58121d507a49 + // } + // Can we dump an existing ID and get more info from it? match result { InsertStepResult::Inserted { .. } => {} - x => { - assert!( - false, - "bad result: {:?} from create_region_snapshot_replacement_step", - x - ); - } + InsertStepResult::AlreadyHandled { existing_step_id } => { + let region_snapshot_replace_request = self + .datastore + .get_region_snapshot_replacement_request_by_id( + &self.opctx(), + existing_step_id, + ) + .await + .unwrap(); + eprintln!( + "we were suppose to create this: {:?}", + region_snapshot_replace_request + ); + panic!("Something else created our replacement"); + } } } @@ -1709,7 +1726,7 @@ mod region_snapshot_replacement { } failed = true; eprintln!("loop {i}, snapshot that should be gone: {:?}", res); - tokio::time::sleep(std::time::Duration::from_secs(4)).await; + tokio::time::sleep(std::time::Duration::from_secs(40)).await; } if failed { panic!("failed some number of times checking for target gone"); From b002fb2ef98a7c6e685df66f57eadb75d1e2d937 Mon Sep 17 00:00:00 2001 From: alan Date: Tue, 2 Sep 2025 19:22:15 +0000 Subject: [PATCH 07/50] cargo fmt --- .../crucible_replacements.rs | 33 ++++++++++--------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/nexus/tests/integration_tests/crucible_replacements.rs b/nexus/tests/integration_tests/crucible_replacements.rs index 65fa0dc179a..38f393415e6 100644 --- a/nexus/tests/integration_tests/crucible_replacements.rs +++ b/nexus/tests/integration_tests/crucible_replacements.rs @@ -404,7 +404,10 @@ mod region_replacement { // Assert the request is in state Complete - eprintln!("Waited for all replacements, including {:?}", self.replacement_request_id); + eprintln!( + "Waited for all replacements, including {:?}", + self.replacement_request_id + ); let region_replacement = self .datastore .get_region_replacement_request_by_id( @@ -1675,21 +1678,21 @@ mod region_snapshot_replacement { match result { InsertStepResult::Inserted { .. } => {} - InsertStepResult::AlreadyHandled { existing_step_id } => { - let region_snapshot_replace_request = self - .datastore - .get_region_snapshot_replacement_request_by_id( - &self.opctx(), - existing_step_id, - ) - .await - .unwrap(); - eprintln!( - "we were suppose to create this: {:?}", - region_snapshot_replace_request - ); + InsertStepResult::AlreadyHandled { existing_step_id } => { + let region_snapshot_replace_request = self + .datastore + .get_region_snapshot_replacement_request_by_id( + &self.opctx(), + existing_step_id, + ) + .await + .unwrap(); + eprintln!( + "we were suppose to create this: {:?}", + region_snapshot_replace_request + ); panic!("Something else created our replacement"); - } + } } } From b7ba83363a4dcfac33ec424effada44da112bade Mon Sep 17 00:00:00 2001 From: alan Date: Tue, 2 Sep 2025 20:07:26 +0000 Subject: [PATCH 08/50] make some checks loop forever, others take longer --- .../integration_tests/crucible_replacements.rs | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/nexus/tests/integration_tests/crucible_replacements.rs b/nexus/tests/integration_tests/crucible_replacements.rs index 38f393415e6..f323b99b722 100644 --- a/nexus/tests/integration_tests/crucible_replacements.rs +++ b/nexus/tests/integration_tests/crucible_replacements.rs @@ -1500,7 +1500,7 @@ mod region_snapshot_replacement { // Assert no volumes are referencing the snapshot address let mut failed = false; - for i in 1..10 { + for i in 1..30 { let volumes = self .datastore .find_volumes_referencing_socket_addr( @@ -1513,7 +1513,7 @@ mod region_snapshot_replacement { if !volumes.is_empty() { eprintln!("Volume should be gone, try {i} {:?}", volumes); failed = true; - tokio::time::sleep(std::time::Duration::from_secs(2)).await; + tokio::time::sleep(std::time::Duration::from_secs(5)).await; } else { break; } @@ -1699,10 +1699,11 @@ mod region_snapshot_replacement { pub async fn assert_read_only_target_gone(&self) { let mut failed = false; eprintln!( - "NOW1 replace_request_id: {:?}", + "NOW1 starting, replace_request_id: {:?}", self.replacement_request_id ); - for i in 0..10 { + let mut i = 1; + loop { let region_snapshot_replace_request = self .datastore .get_region_snapshot_replacement_request_by_id( @@ -1729,10 +1730,12 @@ mod region_snapshot_replacement { } failed = true; eprintln!("loop {i}, snapshot that should be gone: {:?}", res); - tokio::time::sleep(std::time::Duration::from_secs(40)).await; + tokio::time::sleep(std::time::Duration::from_secs(5)).await; + i += 1; } + if failed { - panic!("failed some number of times checking for target gone"); + panic!("failed {i} times checking for target gone"); } } pub async fn pre_assert_read_only_target_gone(&self) { From f4157a5fc1d6524942c7b41f0f20f47358fbb73c Mon Sep 17 00:00:00 2001 From: alan Date: Tue, 2 Sep 2025 20:32:14 +0000 Subject: [PATCH 09/50] loop around checking clean slate/ replacement state --- .../region_snapshot_replacement_start.rs | 42 ++++++++++++++----- 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/nexus/src/app/sagas/region_snapshot_replacement_start.rs b/nexus/src/app/sagas/region_snapshot_replacement_start.rs index a977d4dd7bf..29f4b70b187 100644 --- a/nexus/src/app/sagas/region_snapshot_replacement_start.rs +++ b/nexus/src/app/sagas/region_snapshot_replacement_start.rs @@ -1534,17 +1534,37 @@ pub(crate) mod test { request: &RegionSnapshotReplacement, ) { let opctx = test_opctx(cptestctx); - let db_request = datastore - .get_region_snapshot_replacement_request_by_id(&opctx, request.id) - .await - .unwrap(); - assert_eq!(db_request.new_region_id, None); - assert_eq!( - db_request.replacement_state, - RegionSnapshotReplacementState::Requested - ); // ZZZ This failed Allocating != Requested - assert_eq!(db_request.operating_saga_id, None); + let mut i = 1; + let mut failed = false; + loop { + let db_request = datastore + .get_region_snapshot_replacement_request_by_id( + &opctx, request.id, + ) + .await + .unwrap(); + + assert_eq!(db_request.new_region_id, None); + assert_eq!(db_request.operating_saga_id, None); + + if matches!( + db_request.replacement_state, + RegionSnapshotReplacementState::Requested + ) { + failed = true; + eprintln!( + "loop {i} Failed {:?} != Requested", + db_request.replacement_state + ); + } else { + break; + } + i += 1; + } + if failed { + panic!("Did not find expected state for replacement"); + } } async fn assert_volume_untouched( @@ -1605,6 +1625,7 @@ pub(crate) mod test { .unwrap() .unwrap(); + eprintln!("Before unwind, check clean slate"); verify_clean_slate( &cptestctx, &disk_test, @@ -1613,6 +1634,7 @@ pub(crate) mod test { ) .await; + eprintln!("Now calling unwind, what happens here"); crate::app::sagas::test_helpers::action_failure_can_unwind::< SagaRegionSnapshotReplacementStart, _, From 459fcaa008a90bc446ca55d966720d70a469adde Mon Sep 17 00:00:00 2001 From: alan Date: Tue, 2 Sep 2025 20:43:58 +0000 Subject: [PATCH 10/50] fixed incorrect check on last commit --- nexus/src/app/sagas/region_snapshot_replacement_start.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/nexus/src/app/sagas/region_snapshot_replacement_start.rs b/nexus/src/app/sagas/region_snapshot_replacement_start.rs index 29f4b70b187..978d45352aa 100644 --- a/nexus/src/app/sagas/region_snapshot_replacement_start.rs +++ b/nexus/src/app/sagas/region_snapshot_replacement_start.rs @@ -1545,10 +1545,11 @@ pub(crate) mod test { .await .unwrap(); + // Will these also change on unwind? assert_eq!(db_request.new_region_id, None); assert_eq!(db_request.operating_saga_id, None); - if matches!( + if !matches!( db_request.replacement_state, RegionSnapshotReplacementState::Requested ) { @@ -1557,6 +1558,7 @@ pub(crate) mod test { "loop {i} Failed {:?} != Requested", db_request.replacement_state ); + tokio::time::sleep(std::time::Duration::from_secs(5)).await; } else { break; } From b82f35da2b8ad3fdd962df9a47ba2ec641c21110 Mon Sep 17 00:00:00 2001 From: alan Date: Tue, 2 Sep 2025 21:04:18 +0000 Subject: [PATCH 11/50] Make loop forever for volume delete check --- nexus/tests/integration_tests/crucible_replacements.rs | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/nexus/tests/integration_tests/crucible_replacements.rs b/nexus/tests/integration_tests/crucible_replacements.rs index f323b99b722..e99d2a3a138 100644 --- a/nexus/tests/integration_tests/crucible_replacements.rs +++ b/nexus/tests/integration_tests/crucible_replacements.rs @@ -1499,8 +1499,8 @@ mod region_snapshot_replacement { // Assert no volumes are referencing the snapshot address - let mut failed = false; - for i in 1..30 { + let mut counter = 1; + loop { let volumes = self .datastore .find_volumes_referencing_socket_addr( @@ -1512,15 +1512,12 @@ mod region_snapshot_replacement { if !volumes.is_empty() { eprintln!("Volume should be gone, try {i} {:?}", volumes); - failed = true; tokio::time::sleep(std::time::Duration::from_secs(5)).await; + counter += 1; } else { break; } } - if failed { - panic!("Volume references not cleaned up at first try"); - } } /// Assert no Crucible resources are leaked From 1be06ae4648466a8e4f1ed665e8aa67b471a48c8 Mon Sep 17 00:00:00 2001 From: alan Date: Tue, 2 Sep 2025 21:06:56 +0000 Subject: [PATCH 12/50] fix variable name --- nexus/tests/integration_tests/crucible_replacements.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nexus/tests/integration_tests/crucible_replacements.rs b/nexus/tests/integration_tests/crucible_replacements.rs index e99d2a3a138..6252c924aa7 100644 --- a/nexus/tests/integration_tests/crucible_replacements.rs +++ b/nexus/tests/integration_tests/crucible_replacements.rs @@ -1511,7 +1511,7 @@ mod region_snapshot_replacement { .unwrap(); if !volumes.is_empty() { - eprintln!("Volume should be gone, try {i} {:?}", volumes); + eprintln!("Volume should be gone, try {counter} {:?}", volumes); tokio::time::sleep(std::time::Duration::from_secs(5)).await; counter += 1; } else { From 5fa251addf7fdfb09ff17c9abb8aba3156505cbd Mon Sep 17 00:00:00 2001 From: alan Date: Tue, 2 Sep 2025 23:00:37 +0000 Subject: [PATCH 13/50] Make another test loop forever --- nexus/tests/integration_tests/crucible_replacements.rs | 6 ------ 1 file changed, 6 deletions(-) diff --git a/nexus/tests/integration_tests/crucible_replacements.rs b/nexus/tests/integration_tests/crucible_replacements.rs index 6252c924aa7..8c9c6e29a03 100644 --- a/nexus/tests/integration_tests/crucible_replacements.rs +++ b/nexus/tests/integration_tests/crucible_replacements.rs @@ -1694,7 +1694,6 @@ mod region_snapshot_replacement { } pub async fn assert_read_only_target_gone(&self) { - let mut failed = false; eprintln!( "NOW1 starting, replace_request_id: {:?}", self.replacement_request_id @@ -1725,15 +1724,10 @@ mod region_snapshot_replacement { // test pass, move on break; } - failed = true; eprintln!("loop {i}, snapshot that should be gone: {:?}", res); tokio::time::sleep(std::time::Duration::from_secs(5)).await; i += 1; } - - if failed { - panic!("failed {i} times checking for target gone"); - } } pub async fn pre_assert_read_only_target_gone(&self) { eprintln!( From 04051f38e0498d52b2c771c9dbdbd36e9df75248 Mon Sep 17 00:00:00 2001 From: alan Date: Wed, 3 Sep 2025 00:36:20 +0000 Subject: [PATCH 14/50] more debug, correct pre-test checks --- .../crucible_replacements.rs | 23 ++++++++++++++----- 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/nexus/tests/integration_tests/crucible_replacements.rs b/nexus/tests/integration_tests/crucible_replacements.rs index 8c9c6e29a03..f733ed5d2c8 100644 --- a/nexus/tests/integration_tests/crucible_replacements.rs +++ b/nexus/tests/integration_tests/crucible_replacements.rs @@ -1076,6 +1076,7 @@ async fn test_racing_replacements_for_soft_deleted_disk_volume( activate_background_task(&internal_client, "region_replacement_driver") .await; + eprintln!("last_background_task {:?}", last_background_task); let res = match last_background_task.last { LastResult::Completed(last_result_completed) => { match serde_json::from_value::( @@ -1089,9 +1090,10 @@ async fn test_racing_replacements_for_soft_deleted_disk_volume( Ok(v) => { if !v.drive_invoked_ok.is_empty() { + eprintln!("v.drive_ok: {:?}", v.drive_invoked_ok); true } else { - eprintln!("v.drive_ok: {:?}", v.drive_invoked_ok); + eprintln!("v.drive_ok: {:?} empty", v.drive_invoked_ok); false } } @@ -1511,7 +1513,10 @@ mod region_snapshot_replacement { .unwrap(); if !volumes.is_empty() { - eprintln!("Volume should be gone, try {counter} {:?}", volumes); + eprintln!( + "Volume should be gone, try {counter} {:?}", + volumes + ); tokio::time::sleep(std::time::Duration::from_secs(5)).await; counter += 1; } else { @@ -1747,13 +1752,18 @@ mod region_snapshot_replacement { "PRE2 rs_replace_request: {:?}", region_snapshot_replace_request ); - let res = self + match self .datastore .read_only_target_addr(®ion_snapshot_replace_request) .await - .unwrap(); - - eprintln!("PRE3 target that should be gone: {:?}", res); + { + Ok(res) => { + eprintln!("PRE3 target that will be gone: {:?}", res); + } + Err(e) => { + eprintln!("PRE3 target will be gone is error: {:?}", e); + } + } } pub async fn remove_disk_from_snapshot_rop(&self) { @@ -2031,6 +2041,7 @@ async fn test_region_snapshot_replacement_step_after_rop_remove_target_gone( test_harness.transition_request_to_replacement_done().await; test_harness.transition_request_to_running().await; + eprintln!("ROP ONE"); test_harness.pre_assert_read_only_target_gone().await; test_harness.create_manual_region_snapshot_replacement_step().await; test_harness.pre_assert_read_only_target_gone().await; From 5c28a2c5013b29fe16b5b8d06fe50e36d0bb1694 Mon Sep 17 00:00:00 2001 From: alan Date: Wed, 3 Sep 2025 01:13:22 +0000 Subject: [PATCH 15/50] print more when test_cooldown_on_sub_reinc fails --- nexus/src/app/background/tasks/instance_reincarnation.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/nexus/src/app/background/tasks/instance_reincarnation.rs b/nexus/src/app/background/tasks/instance_reincarnation.rs index 4e9fae3c601..313eba1762d 100644 --- a/nexus/src/app/background/tasks/instance_reincarnation.rs +++ b/nexus/src/app/background/tasks/instance_reincarnation.rs @@ -908,6 +908,8 @@ mod test { // Activate the background task again. Now, only instance 2 should be // restarted. let status = assert_activation_ok!(task.activate(&opctx).await); + eprintln!("status: {:?}", status); + // ZZZ This failed with 2 != 1 assert_eq!(status.total_instances_found(), 1); assert_eq!( status.instances_reincarnated, From fadd5a0c8690248ebae14a023b082e8266186ed9 Mon Sep 17 00:00:00 2001 From: alan Date: Wed, 3 Sep 2025 14:16:41 +0000 Subject: [PATCH 16/50] don't unwrap when debugging possible failures --- nexus/tests/integration_tests/crucible_replacements.rs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/nexus/tests/integration_tests/crucible_replacements.rs b/nexus/tests/integration_tests/crucible_replacements.rs index f733ed5d2c8..cec7ab5d20f 100644 --- a/nexus/tests/integration_tests/crucible_replacements.rs +++ b/nexus/tests/integration_tests/crucible_replacements.rs @@ -1676,7 +1676,9 @@ mod region_snapshot_replacement { // We get back: // bad result: AlreadyHandled { existing_step_id: 83e38140-f238-4fed-8cef-58121d507a49 // } - // Can we dump an existing ID and get more info from it? + // Can we dump an existing ID and get more info from it? Yes, but it's also + // possible it's also done, and the request ID is not found. + // We unwrap with: internal_message: "unexpected database error: Record not found" match result { InsertStepResult::Inserted { .. } => {} @@ -1687,10 +1689,10 @@ mod region_snapshot_replacement { &self.opctx(), existing_step_id, ) - .await - .unwrap(); + .await; eprintln!( - "we were suppose to create this: {:?}", + "we were suppose to create this: {:?} but got {:?}", + self.replacement_request_id, region_snapshot_replace_request ); panic!("Something else created our replacement"); From 967a0d443a61d8df132d486c36569d16ae87ec38 Mon Sep 17 00:00:00 2001 From: alan Date: Wed, 3 Sep 2025 17:57:39 +0000 Subject: [PATCH 17/50] debug for test_quiesce_full --- nexus/src/app/quiesce.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/nexus/src/app/quiesce.rs b/nexus/src/app/quiesce.rs index 6c8fe05decd..c8ac5c7d0b1 100644 --- a/nexus/src/app/quiesce.rs +++ b/nexus/src/app/quiesce.rs @@ -158,6 +158,8 @@ mod test { after: DateTime, status: QuiesceStatus, ) { + // ZZZ debug app::quiesce::test::test_quiesce_full + eprintln!("status: {:?}", status); let QuiesceState::Quiesced { time_requested, time_quiesced, @@ -179,6 +181,11 @@ mod test { assert!(time_quiesced >= time_requested); assert!(duration_total >= duration_waiting_for_sagas); assert!(duration_total >= duration_waiting_for_db); + eprintln!( + "dt: {:?} <= {after} - {before} which evaluates to: {:?}", + duration_total, + (after - before).to_std().unwrap() + ); assert!(duration_total <= (after - before).to_std().unwrap()); assert!(status.sagas_pending.is_empty()); assert!(status.db_claims.is_empty()); From deb35db2f4a907a87f0e8bfa0bd3dcfd0c74a140 Mon Sep 17 00:00:00 2001 From: alan Date: Wed, 3 Sep 2025 18:24:43 +0000 Subject: [PATCH 18/50] Increase wait time for instance changes to 320 --- nexus/tests/integration_tests/instances.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nexus/tests/integration_tests/instances.rs b/nexus/tests/integration_tests/instances.rs index f469c8959fd..647b5ad6f55 100644 --- a/nexus/tests/integration_tests/instances.rs +++ b/nexus/tests/integration_tests/instances.rs @@ -6890,7 +6890,7 @@ pub async fn instance_wait_for_state_as( instance_id: InstanceUuid, state: omicron_common::api::external::InstanceState, ) -> Instance { - const MAX_WAIT: Duration = Duration::from_secs(120); + const MAX_WAIT: Duration = Duration::from_secs(320); // 120 was not enough slog::info!( &client.client_log, From 0478bbdccb0bbf0c01b81715eb29343bafacc809 Mon Sep 17 00:00:00 2001 From: alan Date: Wed, 3 Sep 2025 21:07:22 +0000 Subject: [PATCH 19/50] more debug, but this ones a problem --- .../integration_tests/crucible_replacements.rs | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/nexus/tests/integration_tests/crucible_replacements.rs b/nexus/tests/integration_tests/crucible_replacements.rs index cec7ab5d20f..75f7ee5a239 100644 --- a/nexus/tests/integration_tests/crucible_replacements.rs +++ b/nexus/tests/integration_tests/crucible_replacements.rs @@ -1519,6 +1519,11 @@ mod region_snapshot_replacement { ); tokio::time::sleep(std::time::Duration::from_secs(5)).await; counter += 1; + if counter > 200 { + panic!( + "Tried 200 times, and still this did not finish" + ); + } } else { break; } @@ -1660,6 +1665,16 @@ mod region_snapshot_replacement { .await .unwrap(); + /* + let pre_result = self + .datastore + .lookup_region_snapshot_replacement_request( + &self.opctx(), + RegionSnapshot,, + ) + .await + */ + let result = self .datastore .create_region_snapshot_replacement_step( @@ -1670,6 +1685,7 @@ mod region_snapshot_replacement { .await .unwrap(); + eprintln!("result: {:?}", result); // ZZZ: is "AlreadyHandled" an error here? // Could that be a valid result if some other actor put the // replacement step into place? @@ -1691,7 +1707,7 @@ mod region_snapshot_replacement { ) .await; eprintln!( - "we were suppose to create this: {:?} but got {:?}", + "we were suppose to create this: {:?} but found it AlreadyHandled, then got {:?}", self.replacement_request_id, region_snapshot_replace_request ); From 146023c3aa1645733782554cdef99401af53a696 Mon Sep 17 00:00:00 2001 From: alan Date: Thu, 4 Sep 2025 17:26:49 +0000 Subject: [PATCH 20/50] Give up eventually on assert_region_snapshot_replacement_request_untouched --- nexus/src/app/sagas/region_snapshot_replacement_start.rs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/nexus/src/app/sagas/region_snapshot_replacement_start.rs b/nexus/src/app/sagas/region_snapshot_replacement_start.rs index 978d45352aa..fb84606d74a 100644 --- a/nexus/src/app/sagas/region_snapshot_replacement_start.rs +++ b/nexus/src/app/sagas/region_snapshot_replacement_start.rs @@ -1536,7 +1536,6 @@ pub(crate) mod test { let opctx = test_opctx(cptestctx); let mut i = 1; - let mut failed = false; loop { let db_request = datastore .get_region_snapshot_replacement_request_by_id( @@ -1553,20 +1552,20 @@ pub(crate) mod test { db_request.replacement_state, RegionSnapshotReplacementState::Requested ) { - failed = true; eprintln!( "loop {i} Failed {:?} != Requested", db_request.replacement_state ); + // 200 * 5 = 1000 seconds, at this point something is wrong. + if i > 200 { + panic!("Failed to reach requested state after {i} tries"); + } tokio::time::sleep(std::time::Duration::from_secs(5)).await; } else { break; } i += 1; } - if failed { - panic!("Did not find expected state for replacement"); - } } async fn assert_volume_untouched( From f5e7e7a6f27466c7a345ce98e7d78680841fc949 Mon Sep 17 00:00:00 2001 From: alan Date: Thu, 4 Sep 2025 19:17:32 +0000 Subject: [PATCH 21/50] Add a comment about a possible flake point --- sled-agent/src/sim/http_entrypoints_pantry.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sled-agent/src/sim/http_entrypoints_pantry.rs b/sled-agent/src/sim/http_entrypoints_pantry.rs index 672f36d1207..ff22057f2de 100644 --- a/sled-agent/src/sim/http_entrypoints_pantry.rs +++ b/sled-agent/src/sim/http_entrypoints_pantry.rs @@ -399,6 +399,9 @@ mod tests { let raw_url = format!( "https://raw.githubusercontent.com/oxidecomputer/crucible/{part}/openapi/crucible-pantry.json", ); + + // If this fails, then whatever test needed it fails too. + // Should we retry this? ZZZ let raw_json = reqwest::blocking::get(&raw_url).unwrap().text().unwrap(); serde_json::from_str(&raw_json).unwrap() From 0cff9d8db6a35e4c1572fed177a38314d2161170 Mon Sep 17 00:00:00 2001 From: alan Date: Thu, 4 Sep 2025 19:38:23 +0000 Subject: [PATCH 22/50] Increase timeout for crucible replacements --- nexus/tests/integration_tests/crucible_replacements.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nexus/tests/integration_tests/crucible_replacements.rs b/nexus/tests/integration_tests/crucible_replacements.rs index 75f7ee5a239..0b3988e2c04 100644 --- a/nexus/tests/integration_tests/crucible_replacements.rs +++ b/nexus/tests/integration_tests/crucible_replacements.rs @@ -486,7 +486,7 @@ mod region_replacement { } }, &std::time::Duration::from_millis(50), - &std::time::Duration::from_secs(60), + &std::time::Duration::from_secs(260), // 60 was not enough ZZZ ) .await .expect("request transitioned to expected state"); From 1d2b8bab51519cec6fa17e59d2ff96907c8ba36c Mon Sep 17 00:00:00 2001 From: alan Date: Fri, 5 Sep 2025 00:16:22 +0000 Subject: [PATCH 23/50] reduce test threads --- .github/buildomat/build-and-test.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/buildomat/build-and-test.sh b/.github/buildomat/build-and-test.sh index 6cf7f578fb1..9e8a40415ae 100755 --- a/.github/buildomat/build-and-test.sh +++ b/.github/buildomat/build-and-test.sh @@ -130,7 +130,8 @@ ptime -m cargo build -Z unstable-options --timings=json \ # rather than a buildomat timeout. See oxidecomputer/buildomat#8. # banner test -ptime -m timeout 2h cargo nextest run --profile ci --locked --verbose +ptime -m timeout 2h cargo nextest run --profile ci --locked --verbose \ + --test-threads -2 # # https://github.com/nextest-rs/nextest/issues/16 From ad9163cc4cf52681322db24d613729224cdf6c74 Mon Sep 17 00:00:00 2001 From: alan Date: Fri, 5 Sep 2025 00:19:28 +0000 Subject: [PATCH 24/50] Bump timer for inventory collection --- nexus/test-utils/src/resource_helpers.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nexus/test-utils/src/resource_helpers.rs b/nexus/test-utils/src/resource_helpers.rs index bb895d61090..51921009888 100644 --- a/nexus/test-utils/src/resource_helpers.rs +++ b/nexus/test-utils/src/resource_helpers.rs @@ -1685,7 +1685,7 @@ impl<'a, N: NexusServer> DiskTest<'a, N> { } }, &Duration::from_millis(50), - &Duration::from_secs(30), + &Duration::from_secs(120), ) .await .expect("expected to find inventory collection"); From a9f74503feb3cf7f2a907a14a132ba1f4964b4bb Mon Sep 17 00:00:00 2001 From: alan Date: Fri, 5 Sep 2025 00:44:03 +0000 Subject: [PATCH 25/50] Increase timeout for pantry_endpoints to 120 seconds --- sled-agent/src/sim/http_entrypoints_pantry.rs | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/sled-agent/src/sim/http_entrypoints_pantry.rs b/sled-agent/src/sim/http_entrypoints_pantry.rs index ff22057f2de..c10795a923a 100644 --- a/sled-agent/src/sim/http_entrypoints_pantry.rs +++ b/sled-agent/src/sim/http_entrypoints_pantry.rs @@ -400,10 +400,17 @@ mod tests { "https://raw.githubusercontent.com/oxidecomputer/crucible/{part}/openapi/crucible-pantry.json", ); - // If this fails, then whatever test needed it fails too. - // Should we retry this? ZZZ - let raw_json = - reqwest::blocking::get(&raw_url).unwrap().text().unwrap(); + // The default timeout of 30 seconds was sometimes failing under + // heavy load. + let raw_json = reqwest::blocking::Client::builder() + .timeout(std::time::Duration::from_secs(120)) + .build() + .unwrap() + .get(&raw_url) + .send() + .unwrap() + .text() + .unwrap(); serde_json::from_str(&raw_json).unwrap() } From e9e8c0327d3b14313de8b3a0a1d86dc9a36993d5 Mon Sep 17 00:00:00 2001 From: alan Date: Fri, 5 Sep 2025 17:06:56 +0000 Subject: [PATCH 26/50] Cleanup test flake comments --- nexus/src/app/background/tasks/instance_reincarnation.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nexus/src/app/background/tasks/instance_reincarnation.rs b/nexus/src/app/background/tasks/instance_reincarnation.rs index 313eba1762d..c30868bb1da 100644 --- a/nexus/src/app/background/tasks/instance_reincarnation.rs +++ b/nexus/src/app/background/tasks/instance_reincarnation.rs @@ -906,10 +906,10 @@ mod test { .await; // Activate the background task again. Now, only instance 2 should be - // restarted. + // restarted. Possible test flake here and this adds a bit more debug + // if we see this assertion fail. let status = assert_activation_ok!(task.activate(&opctx).await); eprintln!("status: {:?}", status); - // ZZZ This failed with 2 != 1 assert_eq!(status.total_instances_found(), 1); assert_eq!( status.instances_reincarnated, From 46e621d4e9f142ee682753c5a6705f624c6999ca Mon Sep 17 00:00:00 2001 From: alan Date: Fri, 5 Sep 2025 19:15:26 +0000 Subject: [PATCH 27/50] Remove debugging statements --- nexus/src/app/quiesce.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/nexus/src/app/quiesce.rs b/nexus/src/app/quiesce.rs index 1cffe406f6e..4b8202ec9fa 100644 --- a/nexus/src/app/quiesce.rs +++ b/nexus/src/app/quiesce.rs @@ -258,8 +258,6 @@ mod test { after: DateTime, status: QuiesceStatus, ) { - // ZZZ debug app::quiesce::test::test_quiesce_full - eprintln!("status: {:?}", status); let QuiesceState::Quiesced { time_requested, time_quiesced, From 5c43b98fcfa5a1081dbeb25474ed663f59f2e10f Mon Sep 17 00:00:00 2001 From: alan Date: Fri, 5 Sep 2025 21:31:52 +0000 Subject: [PATCH 28/50] cleanup more comments --- nexus/tests/integration_tests/crucible_replacements.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nexus/tests/integration_tests/crucible_replacements.rs b/nexus/tests/integration_tests/crucible_replacements.rs index 0b3988e2c04..d21a76716d7 100644 --- a/nexus/tests/integration_tests/crucible_replacements.rs +++ b/nexus/tests/integration_tests/crucible_replacements.rs @@ -486,7 +486,7 @@ mod region_replacement { } }, &std::time::Duration::from_millis(50), - &std::time::Duration::from_secs(260), // 60 was not enough ZZZ + &std::time::Duration::from_secs(260), // 60 was not enough ) .await .expect("request transitioned to expected state"); From f226c523330dc7bf8f00c799298d3316e5acfb28 Mon Sep 17 00:00:00 2001 From: alan Date: Fri, 5 Sep 2025 23:57:32 +0000 Subject: [PATCH 29/50] Remove more debug messages --- nexus/src/app/sagas/region_snapshot_replacement_start.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/nexus/src/app/sagas/region_snapshot_replacement_start.rs b/nexus/src/app/sagas/region_snapshot_replacement_start.rs index fb84606d74a..d904f096b13 100644 --- a/nexus/src/app/sagas/region_snapshot_replacement_start.rs +++ b/nexus/src/app/sagas/region_snapshot_replacement_start.rs @@ -1626,7 +1626,6 @@ pub(crate) mod test { .unwrap() .unwrap(); - eprintln!("Before unwind, check clean slate"); verify_clean_slate( &cptestctx, &disk_test, @@ -1635,7 +1634,6 @@ pub(crate) mod test { ) .await; - eprintln!("Now calling unwind, what happens here"); crate::app::sagas::test_helpers::action_failure_can_unwind::< SagaRegionSnapshotReplacementStart, _, From ff713377ade98f724099d95d4fa75714e7d582bc Mon Sep 17 00:00:00 2001 From: alan Date: Sat, 6 Sep 2025 06:00:41 +0000 Subject: [PATCH 30/50] comment cleanup --- nexus/src/app/sagas/region_snapshot_replacement_start.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/nexus/src/app/sagas/region_snapshot_replacement_start.rs b/nexus/src/app/sagas/region_snapshot_replacement_start.rs index d904f096b13..88544bb26b6 100644 --- a/nexus/src/app/sagas/region_snapshot_replacement_start.rs +++ b/nexus/src/app/sagas/region_snapshot_replacement_start.rs @@ -1544,7 +1544,6 @@ pub(crate) mod test { .await .unwrap(); - // Will these also change on unwind? assert_eq!(db_request.new_region_id, None); assert_eq!(db_request.operating_saga_id, None); From 9d5d02e45c78354c44d56123af9cbe654d4c36db Mon Sep 17 00:00:00 2001 From: alan Date: Sat, 6 Sep 2025 15:30:54 +0000 Subject: [PATCH 31/50] More comment changes --- .github/buildomat/build-and-test.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/buildomat/build-and-test.sh b/.github/buildomat/build-and-test.sh index 9e8a40415ae..0ae5c0ddc84 100755 --- a/.github/buildomat/build-and-test.sh +++ b/.github/buildomat/build-and-test.sh @@ -128,7 +128,10 @@ ptime -m cargo build -Z unstable-options --timings=json \ # # We apply our own timeout to ensure that we get a normal failure on timeout # rather than a buildomat timeout. See oxidecomputer/buildomat#8. -# +# To avoid too many tests running at the same time, we choose a test threads +# 2 less (negative 2) than the default. This avoids many test flakes where +# the test would have worked but the system was too overloaded and tests +# take longer than their default timeouts. banner test ptime -m timeout 2h cargo nextest run --profile ci --locked --verbose \ --test-threads -2 From e45b4e56063ac34c6746abc075783c305a82c212 Mon Sep 17 00:00:00 2001 From: alan Date: Sat, 6 Sep 2025 19:25:05 +0000 Subject: [PATCH 32/50] Comment for quiesce eprintln --- nexus/src/app/quiesce.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/nexus/src/app/quiesce.rs b/nexus/src/app/quiesce.rs index 4b8202ec9fa..762345a9384 100644 --- a/nexus/src/app/quiesce.rs +++ b/nexus/src/app/quiesce.rs @@ -282,6 +282,7 @@ mod test { assert!(duration_total >= duration_draining_sagas); assert!(duration_total >= duration_draining_db); assert!(duration_total >= duration_recording_quiesce); + // Add additional debug information trying to find a test flake eprintln!( "dt: {:?} <= {after} - {before} which evaluates to: {:?}", duration_total, From 5e83e94feb77b1e998a7ea5f154d86fdef91f7a2 Mon Sep 17 00:00:00 2001 From: alan Date: Sat, 6 Sep 2025 21:21:47 +0000 Subject: [PATCH 33/50] Remove more debug messages --- nexus/tests/integration_tests/crucible_replacements.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/nexus/tests/integration_tests/crucible_replacements.rs b/nexus/tests/integration_tests/crucible_replacements.rs index d21a76716d7..bf9eb123971 100644 --- a/nexus/tests/integration_tests/crucible_replacements.rs +++ b/nexus/tests/integration_tests/crucible_replacements.rs @@ -382,7 +382,6 @@ mod region_replacement { pub async fn delete_the_disk(&self) { let disk_url = get_disk_url("disk"); - eprintln!("Delete this disk: {:?}", disk_url); NexusRequest::object_delete(&self.client, &disk_url) .authn_as(AuthnMode::PrivilegedUser) .execute() From 35aa542dcf10d85fb237a46355e26654f9299741 Mon Sep 17 00:00:00 2001 From: Nils Nieuwejaar Date: Fri, 5 Sep 2025 20:59:32 -0400 Subject: [PATCH 34/50] Update dendrite, lldpd, maghemite, and sidecar-lite to get ipv6 ecmp (#8987) --- Cargo.lock | 235 ++++++++++++++----- Cargo.toml | 8 +- dev-tools/ls-apis/tests/api_dependencies.out | 4 +- dev-tools/xtask/src/virtual_hardware.rs | 2 +- package-manifest.toml | 28 +-- tools/dendrite_stub_checksums | 6 +- tools/dendrite_version | 2 +- tools/maghemite_ddm_openapi_version | 2 +- tools/maghemite_mg_openapi_version | 4 +- tools/maghemite_mgd_checksums | 4 +- workspace-hack/Cargo.toml | 6 +- 11 files changed, 208 insertions(+), 93 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index cf19e09a225..9eee8a2e185 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1657,13 +1657,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "117725a109d387c937a1533ce01b450cbde6b88abceea8473c4d7a85853cda3c" dependencies = [ "lazy_static", - "windows-sys 0.59.0", + "windows-sys 0.48.0", ] [[package]] name = "common" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/dendrite?rev=8314881e372d7bbb4a4ee2da051ecdc34f66c534#8314881e372d7bbb4a4ee2da051ecdc34f66c534" +source = "git+https://github.com/oxidecomputer/dendrite?branch=main#72461d3a6e4724fd33454836d3c9d93c393fd4e4" dependencies = [ "anyhow", "chrono", @@ -1685,13 +1685,13 @@ dependencies = [ [[package]] name = "common" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/dendrite#0f203b052d7a94058fed1f4006fa7116f51bfee4" +source = "git+https://github.com/oxidecomputer/dendrite?rev=738c80d18d5e94eda367440ade7743e9d9f124de#738c80d18d5e94eda367440ade7743e9d9f124de" dependencies = [ "anyhow", "chrono", "oximeter 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=main)", "oxnet", - "rand 0.8.5", + "rand 0.9.2", "schemars", "serde", "serde_json", @@ -1704,23 +1704,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "common" -version = "0.1.0" -source = "git+https://github.com/oxidecomputer/lldp#82fbc8c9747eb9f74dde0f92ae77ec67f65652c4" -dependencies = [ - "anyhow", - "dpd-client 0.1.0 (git+https://github.com/oxidecomputer/dendrite)", - "schemars", - "serde", - "serde_json", - "slog", - "slog-async", - "slog-bunyan", - "slog-term", - "thiserror 1.0.69", -] - [[package]] name = "compact_str" version = "0.8.0" @@ -2403,7 +2386,7 @@ dependencies = [ [[package]] name = "ddm-admin-client" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/maghemite?rev=fa5f15cdcd5864161a929e2ec01534f70dfba216#fa5f15cdcd5864161a929e2ec01534f70dfba216" +source = "git+https://github.com/oxidecomputer/maghemite?rev=e5f53eacc5ab1015c4cd7298912657521a6b8351#e5f53eacc5ab1015c4cd7298912657521a6b8351" dependencies = [ "oxnet", "percent-encoding", @@ -2873,11 +2856,11 @@ dependencies = [ [[package]] name = "dpd-client" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/dendrite?rev=8314881e372d7bbb4a4ee2da051ecdc34f66c534#8314881e372d7bbb4a4ee2da051ecdc34f66c534" +source = "git+https://github.com/oxidecomputer/dendrite?branch=main#72461d3a6e4724fd33454836d3c9d93c393fd4e4" dependencies = [ "async-trait", "chrono", - "common 0.1.0 (git+https://github.com/oxidecomputer/dendrite?rev=8314881e372d7bbb4a4ee2da051ecdc34f66c534)", + "common 0.1.0 (git+https://github.com/oxidecomputer/dendrite?branch=main)", "crc8", "futures", "http", @@ -2897,16 +2880,16 @@ dependencies = [ [[package]] name = "dpd-client" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/dendrite#0f203b052d7a94058fed1f4006fa7116f51bfee4" +source = "git+https://github.com/oxidecomputer/dendrite?rev=738c80d18d5e94eda367440ade7743e9d9f124de#738c80d18d5e94eda367440ade7743e9d9f124de" dependencies = [ "async-trait", "chrono", - "common 0.1.0 (git+https://github.com/oxidecomputer/dendrite)", + "common 0.1.0 (git+https://github.com/oxidecomputer/dendrite?rev=738c80d18d5e94eda367440ade7743e9d9f124de)", "crc8", "futures", "http", "oxnet", - "progenitor 0.9.1", + "progenitor 0.11.0", "regress", "reqwest", "schemars", @@ -5658,7 +5641,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4" dependencies = [ "cfg-if", - "windows-targets 0.52.6", + "windows-targets 0.48.5", ] [[package]] @@ -5840,11 +5823,11 @@ dependencies = [ [[package]] name = "lldpd-client" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/lldp#82fbc8c9747eb9f74dde0f92ae77ec67f65652c4" +source = "git+https://github.com/oxidecomputer/lldp#53f4254b1ce7f13e23fdb54180015760b5f44d55" dependencies = [ "chrono", - "common 0.1.0 (git+https://github.com/oxidecomputer/lldp)", "futures", + "lldpd-common", "progenitor 0.9.1", "protocol", "reqwest", @@ -5855,6 +5838,23 @@ dependencies = [ "uuid", ] +[[package]] +name = "lldpd-common" +version = "0.1.0" +source = "git+https://github.com/oxidecomputer/lldp#53f4254b1ce7f13e23fdb54180015760b5f44d55" +dependencies = [ + "anyhow", + "dpd-client 0.1.0 (git+https://github.com/oxidecomputer/dendrite?branch=main)", + "schemars", + "serde", + "serde_json", + "slog", + "slog-async", + "slog-bunyan", + "slog-term", + "thiserror 1.0.69", +] + [[package]] name = "lock_api" version = "0.4.12" @@ -6062,7 +6062,7 @@ dependencies = [ [[package]] name = "mg-admin-client" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/maghemite?rev=fa5f15cdcd5864161a929e2ec01534f70dfba216#fa5f15cdcd5864161a929e2ec01534f70dfba216" +source = "git+https://github.com/oxidecomputer/maghemite?rev=e5f53eacc5ab1015c4cd7298912657521a6b8351#e5f53eacc5ab1015c4cd7298912657521a6b8351" dependencies = [ "anyhow", "chrono", @@ -7878,7 +7878,7 @@ dependencies = [ "display-error-chain", "dns-server", "dns-service-client", - "dpd-client 0.1.0 (git+https://github.com/oxidecomputer/dendrite?rev=8314881e372d7bbb4a4ee2da051ecdc34f66c534)", + "dpd-client 0.1.0 (git+https://github.com/oxidecomputer/dendrite?rev=738c80d18d5e94eda367440ade7743e9d9f124de)", "dropshot", "ereport-types", "expectorate", @@ -8346,7 +8346,7 @@ dependencies = [ "display-error-chain", "dns-server", "dns-service-client", - "dpd-client 0.1.0 (git+https://github.com/oxidecomputer/dendrite?rev=8314881e372d7bbb4a4ee2da051ecdc34f66c534)", + "dpd-client 0.1.0 (git+https://github.com/oxidecomputer/dendrite?rev=738c80d18d5e94eda367440ade7743e9d9f124de)", "dropshot", "expectorate", "flate2", @@ -8551,6 +8551,7 @@ dependencies = [ "getrandom 0.3.1", "group", "hashbrown 0.15.4", + "heck 0.4.1", "hickory-proto 0.25.2", "hmac", "hyper", @@ -8991,7 +8992,7 @@ dependencies = [ [[package]] name = "oximeter" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/omicron?branch=main#e7579c3641181202982f72fe1d1fa25d26183412" +source = "git+https://github.com/oxidecomputer/omicron?branch=main#71f196548bd2a8acbcd649d10fac2f2dca2f0cd0" dependencies = [ "anyhow", "chrono", @@ -9191,7 +9192,7 @@ dependencies = [ [[package]] name = "oximeter-macro-impl" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/omicron?branch=main#e7579c3641181202982f72fe1d1fa25d26183412" +source = "git+https://github.com/oxidecomputer/omicron?branch=main#71f196548bd2a8acbcd649d10fac2f2dca2f0cd0" dependencies = [ "omicron-workspace-hack", "proc-macro2", @@ -9248,7 +9249,7 @@ dependencies = [ [[package]] name = "oximeter-schema" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/omicron?branch=main#e7579c3641181202982f72fe1d1fa25d26183412" +source = "git+https://github.com/oxidecomputer/omicron?branch=main#71f196548bd2a8acbcd649d10fac2f2dca2f0cd0" dependencies = [ "anyhow", "chrono", @@ -9297,7 +9298,7 @@ dependencies = [ [[package]] name = "oximeter-timeseries-macro" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/omicron?branch=main#e7579c3641181202982f72fe1d1fa25d26183412" +source = "git+https://github.com/oxidecomputer/omicron?branch=main#71f196548bd2a8acbcd649d10fac2f2dca2f0cd0" dependencies = [ "omicron-workspace-hack", "oximeter-schema 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=main)", @@ -9337,7 +9338,7 @@ dependencies = [ [[package]] name = "oximeter-types" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/omicron?branch=main#e7579c3641181202982f72fe1d1fa25d26183412" +source = "git+https://github.com/oxidecomputer/omicron?branch=main#71f196548bd2a8acbcd649d10fac2f2dca2f0cd0" dependencies = [ "bytes", "chrono", @@ -9349,8 +9350,8 @@ dependencies = [ "regex", "schemars", "serde", - "strum 0.26.3", - "thiserror 1.0.69", + "strum 0.27.2", + "thiserror 2.0.16", "uuid", ] @@ -9478,6 +9479,17 @@ dependencies = [ "unicode-width 0.2.0", ] +[[package]] +name = "papergrid" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6978128c8b51d8f4080631ceb2302ab51e32cc6e8615f735ee2f83fd269ae3f1" +dependencies = [ + "bytecount", + "fnv", + "unicode-width 0.2.0", +] + [[package]] name = "parallel-task-set" version = "0.1.0" @@ -10240,9 +10252,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.95" +version = "1.0.101" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" +checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de" dependencies = [ "unicode-ident", ] @@ -10280,6 +10292,17 @@ dependencies = [ "progenitor-macro 0.10.0", ] +[[package]] +name = "progenitor" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7b99ef43fdd69d70aa4df8869db24b10ac704a2dbbc387ffac51944a1f3c0a8" +dependencies = [ + "progenitor-client 0.11.0", + "progenitor-impl 0.11.0", + "progenitor-macro 0.11.0", +] + [[package]] name = "progenitor-client" version = "0.8.0" @@ -10325,6 +10348,21 @@ dependencies = [ "serde_urlencoded", ] +[[package]] +name = "progenitor-client" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3832a961a5f1b0b5a5ccda5fbf67cae2ba708f6add667401007764ba504ffebf" +dependencies = [ + "bytes", + "futures-core", + "percent-encoding", + "reqwest", + "serde", + "serde_json", + "serde_urlencoded", +] + [[package]] name = "progenitor-impl" version = "0.8.0" @@ -10387,7 +10425,29 @@ dependencies = [ "serde_json", "syn 2.0.106", "thiserror 2.0.16", - "typify 0.4.1", + "typify 0.4.3", + "unicode-ident", +] + +[[package]] +name = "progenitor-impl" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7646201b823e61712dd72f37428ceecaa8fb2a6c841e5d7cf909edb9a17f5677" +dependencies = [ + "heck 0.5.0", + "http", + "indexmap 2.11.0", + "openapiv3", + "proc-macro2", + "quote", + "regex", + "schemars", + "serde", + "serde_json", + "syn 2.0.106", + "thiserror 2.0.16", + "typify 0.4.3", "unicode-ident", ] @@ -10445,6 +10505,24 @@ dependencies = [ "syn 2.0.106", ] +[[package]] +name = "progenitor-macro" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e710a11140d9b4241b7d8a90748f6125b6796d7a1205238eddb08dc790ce3830" +dependencies = [ + "openapiv3", + "proc-macro2", + "progenitor-impl 0.11.0", + "quote", + "schemars", + "serde", + "serde_json", + "serde_tokenstream", + "serde_yaml", + "syn 2.0.106", +] + [[package]] name = "propolis-client" version = "0.1.0" @@ -10579,7 +10657,7 @@ dependencies = [ [[package]] name = "protocol" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/lldp#82fbc8c9747eb9f74dde0f92ae77ec67f65652c4" +source = "git+https://github.com/oxidecomputer/lldp#53f4254b1ce7f13e23fdb54180015760b5f44d55" dependencies = [ "anyhow", "schemars", @@ -12782,7 +12860,7 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "03c3c6b7927ffe7ecaa769ee0e3994da3b8cafc8f444578982c83ecb161af917" dependencies = [ - "heck 0.5.0", + "heck 0.4.1", "proc-macro2", "quote", "syn 2.0.106", @@ -13267,6 +13345,17 @@ dependencies = [ "tabled_derive 0.10.0", ] +[[package]] +name = "tabled" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e39a2ee1fbcd360805a771e1b300f78cc88fec7b8d3e2f71cd37bbf23e725c7d" +dependencies = [ + "papergrid 0.17.0", + "tabled_derive 0.11.0", + "testing_table", +] + [[package]] name = "tabled_derive" version = "0.7.0" @@ -13293,6 +13382,19 @@ dependencies = [ "syn 2.0.106", ] +[[package]] +name = "tabled_derive" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ea5d1b13ca6cff1f9231ffd62f15eefd72543dab5e468735f1a456728a02846" +dependencies = [ + "heck 0.5.0", + "proc-macro-error2", + "proc-macro2", + "quote", + "syn 2.0.106", +] + [[package]] name = "tabwriter" version = "1.4.0" @@ -13434,6 +13536,15 @@ dependencies = [ "syn 2.0.106", ] +[[package]] +name = "testing_table" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f8daae29995a24f65619e19d8d31dea5b389f3d853d8bf297bbf607cd0014cc" +dependencies = [ + "unicode-width 0.2.0", +] + [[package]] name = "textwrap" version = "0.16.2" @@ -14104,19 +14215,19 @@ dependencies = [ [[package]] name = "transceiver-controller" version = "0.1.1" -source = "git+https://github.com/oxidecomputer/transceiver-control?branch=main#f3cb309c2bd2c03423467fd93992e9033ae3133c" +source = "git+https://github.com/oxidecomputer/transceiver-control?branch=main#a72631ed6aa46e856fdc6944740a92999f30f997" dependencies = [ "anyhow", "clap", "hubpack", "itertools 0.14.0", - "nix 0.29.0", + "nix 0.30.1", "schemars", "serde", "slog", "slog-async", "slog-term", - "tabled 0.18.0", + "tabled 0.20.0", "thiserror 2.0.16", "tokio", "transceiver-decode 0.1.0 (git+https://github.com/oxidecomputer/transceiver-control?branch=main)", @@ -14152,7 +14263,7 @@ dependencies = [ [[package]] name = "transceiver-decode" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/transceiver-control?branch=main#f3cb309c2bd2c03423467fd93992e9033ae3133c" +source = "git+https://github.com/oxidecomputer/transceiver-control?branch=main#a72631ed6aa46e856fdc6944740a92999f30f997" dependencies = [ "schemars", "serde", @@ -14176,7 +14287,7 @@ dependencies = [ [[package]] name = "transceiver-messages" version = "0.1.1" -source = "git+https://github.com/oxidecomputer/transceiver-control?branch=main#f3cb309c2bd2c03423467fd93992e9033ae3133c" +source = "git+https://github.com/oxidecomputer/transceiver-control?branch=main#a72631ed6aa46e856fdc6944740a92999f30f997" dependencies = [ "bitflags 2.9.1", "clap", @@ -14476,12 +14587,12 @@ dependencies = [ [[package]] name = "typify" -version = "0.4.1" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcc5bec3cdff70fd542e579aa2e52967833e543a25fae0d14579043d2e868a50" +checksum = "7144144e97e987c94758a3017c920a027feac0799df325d6df4fc8f08d02068e" dependencies = [ - "typify-impl 0.4.1", - "typify-macro 0.4.1", + "typify-impl 0.4.3", + "typify-macro 0.4.3", ] [[package]] @@ -14526,9 +14637,9 @@ dependencies = [ [[package]] name = "typify-impl" -version = "0.4.1" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b52a67305054e1da6f3d99ad94875dcd0c7c49adbd17b4b64f0eefb7ae5bf8ab" +checksum = "062879d46aa4c9dfe0d33b035bbaf512da192131645d05deacb7033ec8581a09" dependencies = [ "heck 0.5.0", "log", @@ -14580,9 +14691,9 @@ dependencies = [ [[package]] name = "typify-macro" -version = "0.4.1" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ff5799be156e4f635c348c6051d165e1c59997827155133351a8c4d333d9841" +checksum = "9708a3ceb6660ba3f8d2b8f0567e7d4b8b198e2b94d093b8a6077a751425de9e" dependencies = [ "proc-macro2", "quote", @@ -14592,7 +14703,7 @@ dependencies = [ "serde_json", "serde_tokenstream", "syn 2.0.106", - "typify-impl 0.4.1", + "typify-impl 0.4.3", ] [[package]] @@ -15256,7 +15367,7 @@ name = "wicket-common" version = "0.1.0" dependencies = [ "anyhow", - "dpd-client 0.1.0 (git+https://github.com/oxidecomputer/dendrite?rev=8314881e372d7bbb4a4ee2da051ecdc34f66c534)", + "dpd-client 0.1.0 (git+https://github.com/oxidecomputer/dendrite?rev=738c80d18d5e94eda367440ade7743e9d9f124de)", "dropshot", "gateway-client", "gateway-types", @@ -15316,7 +15427,7 @@ dependencies = [ "clap", "debug-ignore", "display-error-chain", - "dpd-client 0.1.0 (git+https://github.com/oxidecomputer/dendrite?rev=8314881e372d7bbb4a4ee2da051ecdc34f66c534)", + "dpd-client 0.1.0 (git+https://github.com/oxidecomputer/dendrite?rev=738c80d18d5e94eda367440ade7743e9d9f124de)", "dropshot", "either", "expectorate", @@ -15461,7 +15572,7 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.48.0", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 2ae983777bc..36527a0183e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -432,8 +432,8 @@ digest = "0.10.7" dns-server = { path = "dns-server" } dns-server-api = { path = "dns-server-api" } dns-service-client = { path = "clients/dns-service-client" } -dpd-client = { git = "https://github.com/oxidecomputer/dendrite", rev = "8314881e372d7bbb4a4ee2da051ecdc34f66c534" } -dropshot = { version = "0.16.4", features = [ "usdt-probes" ] } +dpd-client = { git = "https://github.com/oxidecomputer/dendrite", rev = "738c80d18d5e94eda367440ade7743e9d9f124de" } +dropshot = { version = "0.16.3", features = [ "usdt-probes" ] } dyn-clone = "1.0.20" either = "1.15.0" ereport-types = { path = "ereport/types" } @@ -528,8 +528,8 @@ newtype_derive = "0.1.6" ntp-admin-api = { path = "ntp-admin/api" } ntp-admin-client = { path = "clients/ntp-admin-client" } ntp-admin-types = { path = "ntp-admin/types" } -mg-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "fa5f15cdcd5864161a929e2ec01534f70dfba216" } -ddm-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "fa5f15cdcd5864161a929e2ec01534f70dfba216" } +mg-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "e5f53eacc5ab1015c4cd7298912657521a6b8351" } +ddm-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "e5f53eacc5ab1015c4cd7298912657521a6b8351" } multimap = "0.10.1" nexus-auth = { path = "nexus/auth" } nexus-background-task-interface = { path = "nexus/background-task-interface" } diff --git a/dev-tools/ls-apis/tests/api_dependencies.out b/dev-tools/ls-apis/tests/api_dependencies.out index 95da5723065..0eed460686c 100644 --- a/dev-tools/ls-apis/tests/api_dependencies.out +++ b/dev-tools/ls-apis/tests/api_dependencies.out @@ -12,8 +12,10 @@ Clickhouse Single-Node Cluster Admin (client: clickhouse-admin-single-client) consumed by: omicron-nexus (omicron/nexus) via 2 paths CockroachDB Cluster Admin (client: cockroach-admin-client) + consumed by: dpd (dendrite/dpd) via 1 path consumed by: omicron-nexus (omicron/nexus) via 3 paths consumed by: omicron-sled-agent (omicron/sled-agent) via 1 path + consumed by: tfportd (dendrite/tfportd) via 1 path Crucible Agent (client: crucible-agent-client) consumed by: omicron-nexus (omicron/nexus) via 1 path @@ -39,7 +41,7 @@ Dendrite DPD (client: dpd-client) consumed by: mgd (maghemite/mgd) via 1 path consumed by: omicron-nexus (omicron/nexus) via 2 paths consumed by: omicron-sled-agent (omicron/sled-agent) via 1 path - consumed by: tfportd (dendrite/tfportd) via 1 path + consumed by: tfportd (dendrite/tfportd) via 2 paths consumed by: wicketd (omicron/wicketd) via 2 paths Downstairs Controller (debugging only) (client: dsc-client) diff --git a/dev-tools/xtask/src/virtual_hardware.rs b/dev-tools/xtask/src/virtual_hardware.rs index ebb02271eb8..aca8b979db5 100644 --- a/dev-tools/xtask/src/virtual_hardware.rs +++ b/dev-tools/xtask/src/virtual_hardware.rs @@ -117,7 +117,7 @@ const ZPOOL: &'static str = "/usr/sbin/zpool"; const ZONEADM: &'static str = "/usr/sbin/zoneadm"; const SIDECAR_LITE_COMMIT: &'static str = - "9499681efa8cdbffe807e6b40bf9211d0d2c21aa"; + "a95b7a9f78c08125f4e34106f5c885c7e9f2e8d5"; const SOFTNPU_COMMIT: &'static str = "3203c51cf4473d30991b522062ac0df2e045c2f2"; const PXA_MAC_DEFAULT: &'static str = "a8:e1:de:01:70:1d"; diff --git a/package-manifest.toml b/package-manifest.toml index b503a8fd4ab..bbc3ffd62f0 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -653,10 +653,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "fa5f15cdcd5864161a929e2ec01534f70dfba216" +source.commit = "e5f53eacc5ab1015c4cd7298912657521a6b8351" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mg-ddm-gz.sha256.txt -source.sha256 = "9700900c62394b0858dbd4c12ac23039bed24cae8782e5153f8dfe707589c182" +source.sha256 = "0743e486c7c4183794c0b4f321f6ce7169fc95753f0c9240e407fd64f949ab38" output.type = "tarball" [package.mg-ddm] @@ -669,10 +669,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "fa5f15cdcd5864161a929e2ec01534f70dfba216" +source.commit = "e5f53eacc5ab1015c4cd7298912657521a6b8351" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mg-ddm.sha256.txt -source.sha256 = "2a2b15b22b0c7604c4e5692af24515511084f2dbb17e27af4328bb4e0a8a441e" +source.sha256 = "a1af0c959e532b78137c4466b9242c2583f753b942d92708783fafe5f937643b" output.type = "zone" output.intermediate_only = true @@ -684,10 +684,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "fa5f15cdcd5864161a929e2ec01534f70dfba216" +source.commit = "e5f53eacc5ab1015c4cd7298912657521a6b8351" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mgd.sha256.txt -source.sha256 = "b18be967a805bf4c0bf872d152ae2f58972c4f3c173a7c0f33c2475a011f1dd1" +source.sha256 = "2281f14eda20b97a1933aaf7bba8cc07e6b65da39d6dc62ce210f9601fcc3396" output.type = "zone" output.intermediate_only = true @@ -695,8 +695,8 @@ output.intermediate_only = true service_name = "lldp" source.type = "prebuilt" source.repo = "lldp" -source.commit = "82fbc8c9747eb9f74dde0f92ae77ec67f65652c4" -source.sha256 = "59050782fa238bcf3c1234ac8262300db9eb98823c76a2188bff2994f3779857" +source.commit = "53f4254b1ce7f13e23fdb54180015760b5f44d55" +source.sha256 = "8ef356ec9ca4d261c35d6051d087d17e8757778114eb314d7a011e0927b006cd" output.type = "zone" output.intermediate_only = true @@ -735,8 +735,8 @@ only_for_targets.image = "standard" # the other `source.*` keys. source.type = "prebuilt" source.repo = "dendrite" -source.commit = "8314881e372d7bbb4a4ee2da051ecdc34f66c534" -source.sha256 = "33c702ec73d48854e75eeda60e75be57c628dc6b3cc70ae341af0b4b9a99e1b8" +source.commit = "738c80d18d5e94eda367440ade7743e9d9f124de" +source.sha256 = "cc78c4fa4f863df62eda1f90175f3a7ffe1b34b7bb6a95bed869c2df5e6c4a08" output.type = "zone" output.intermediate_only = true @@ -762,8 +762,8 @@ only_for_targets.image = "standard" # the other `source.*` keys. source.type = "prebuilt" source.repo = "dendrite" -source.commit = "8314881e372d7bbb4a4ee2da051ecdc34f66c534" -source.sha256 = "05543cffe604f8caccf8f3e223b2ee404620f3adc3dd4812e30fd2f5a92f6010" +source.commit = "738c80d18d5e94eda367440ade7743e9d9f124de" +source.sha256 = "55376e97f2b5695475275f78b8b3d2c8bad1100df13a75746fe82ad43e786082" output.type = "zone" output.intermediate_only = true @@ -782,8 +782,8 @@ only_for_targets.image = "standard" # the other `source.*` keys. source.type = "prebuilt" source.repo = "dendrite" -source.commit = "8314881e372d7bbb4a4ee2da051ecdc34f66c534" -source.sha256 = "e57e5cfaadb451c786d94665ae91459d46412541fedcb3d3903766191fdfd2f0" +source.commit = "738c80d18d5e94eda367440ade7743e9d9f124de" +source.sha256 = "f2d3f38100fd49fff3884512ecfeb92c4a1d079de2c862b869c8aa83c75ba640" output.type = "zone" output.intermediate_only = true diff --git a/tools/dendrite_stub_checksums b/tools/dendrite_stub_checksums index c6735294200..df90dcf5760 100644 --- a/tools/dendrite_stub_checksums +++ b/tools/dendrite_stub_checksums @@ -1,3 +1,3 @@ -CIDL_SHA256_ILLUMOS="33c702ec73d48854e75eeda60e75be57c628dc6b3cc70ae341af0b4b9a99e1b8" -CIDL_SHA256_LINUX_DPD="670809a9cd9112a4603c71df6ecd894fa777b3ed9cf8055582aab8e5b33ac157" -CIDL_SHA256_LINUX_SWADM="3cc3fd488b7134b3e36507000d79d04d7524e0e0b0af3e0bf950fd5b4f605713" +CIDL_SHA256_ILLUMOS="cc78c4fa4f863df62eda1f90175f3a7ffe1b34b7bb6a95bed869c2df5e6c4a08" +CIDL_SHA256_LINUX_DPD="c806645b8bfa2b605c4cb48c33a7470ba91c82696df59738518087f92f4bb2e0" +CIDL_SHA256_LINUX_SWADM="d59294cd4094c10c50341bf94deebccf91376a7e377c5a3b0113344b8841510a" diff --git a/tools/dendrite_version b/tools/dendrite_version index e97715b9697..407a104707b 100644 --- a/tools/dendrite_version +++ b/tools/dendrite_version @@ -1 +1 @@ -COMMIT="8314881e372d7bbb4a4ee2da051ecdc34f66c534" +COMMIT="738c80d18d5e94eda367440ade7743e9d9f124de" diff --git a/tools/maghemite_ddm_openapi_version b/tools/maghemite_ddm_openapi_version index 7d6c2fac6a9..e77e9e13128 100644 --- a/tools/maghemite_ddm_openapi_version +++ b/tools/maghemite_ddm_openapi_version @@ -1,2 +1,2 @@ -COMMIT="fa5f15cdcd5864161a929e2ec01534f70dfba216" +COMMIT="e5f53eacc5ab1015c4cd7298912657521a6b8351" SHA2="9146aaf60a52ecd138139708e4019e4496f330fb81a2c5a7a70cd3436a6a1318" diff --git a/tools/maghemite_mg_openapi_version b/tools/maghemite_mg_openapi_version index e2baa1d7ba3..31a7a21fd5b 100644 --- a/tools/maghemite_mg_openapi_version +++ b/tools/maghemite_mg_openapi_version @@ -1,2 +1,2 @@ -COMMIT="fa5f15cdcd5864161a929e2ec01534f70dfba216" -SHA2="7af1675e2e93e395185f8d3676db972db0123714c4c5640608f3e3570f3ce3a8" +COMMIT="e5f53eacc5ab1015c4cd7298912657521a6b8351" +SHA2="3d68b221bd0a72cf87195a8a4c8ae408daadf50763349606b9df8186b2d6fe9a" diff --git a/tools/maghemite_mgd_checksums b/tools/maghemite_mgd_checksums index bd572330f26..67987a519ed 100644 --- a/tools/maghemite_mgd_checksums +++ b/tools/maghemite_mgd_checksums @@ -1,2 +1,2 @@ -CIDL_SHA256="b18be967a805bf4c0bf872d152ae2f58972c4f3c173a7c0f33c2475a011f1dd1" -MGD_LINUX_SHA256="898bda7698ce594962b61e7c1b637f0f5ad843c1ab60eb5846fe1afdb84be8df" \ No newline at end of file +CIDL_SHA256="2281f14eda20b97a1933aaf7bba8cc07e6b65da39d6dc62ce210f9601fcc3396" +MGD_LINUX_SHA256="a7f483807db3b1f98ceece73e126f09ae8c3b47f4fab1a303b74819caea64c73" \ No newline at end of file diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index b06446c5f61..d0d8f2e63e3 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -64,6 +64,7 @@ generic-array = { version = "0.14.7", default-features = false, features = ["mor getrandom-6f8ce4dd05d13bba = { package = "getrandom", version = "0.2.15", default-features = false, features = ["js", "rdrand", "std"] } group = { version = "0.13.0", default-features = false, features = ["alloc"] } hashbrown = { version = "0.15.4" } +heck = { version = "0.4.1" } hickory-proto = { version = "0.25.2", features = ["serde", "text-parsing"] } hmac = { version = "0.12.1", default-features = false, features = ["reset"] } hyper = { version = "1.7.0", features = ["full"] } @@ -98,7 +99,7 @@ portable-atomic = { version = "1.11.0" } postgres-types = { version = "0.2.9", default-features = false, features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } ppv-lite86 = { version = "0.2.20", default-features = false, features = ["simd", "std"] } predicates = { version = "3.1.3" } -proc-macro2 = { version = "1.0.95" } +proc-macro2 = { version = "1.0.101" } rand-274715c4dabd11b0 = { package = "rand", version = "0.9.2" } rand-c38e5c1d305a1b54 = { package = "rand", version = "0.8.5" } rand_chacha-274715c4dabd11b0 = { package = "rand_chacha", version = "0.9.0", default-features = false, features = ["std"] } @@ -200,6 +201,7 @@ generic-array = { version = "0.14.7", default-features = false, features = ["mor getrandom-6f8ce4dd05d13bba = { package = "getrandom", version = "0.2.15", default-features = false, features = ["js", "rdrand", "std"] } group = { version = "0.13.0", default-features = false, features = ["alloc"] } hashbrown = { version = "0.15.4" } +heck = { version = "0.4.1" } hickory-proto = { version = "0.25.2", features = ["serde", "text-parsing"] } hmac = { version = "0.12.1", default-features = false, features = ["reset"] } hyper = { version = "1.7.0", features = ["full"] } @@ -234,7 +236,7 @@ portable-atomic = { version = "1.11.0" } postgres-types = { version = "0.2.9", default-features = false, features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } ppv-lite86 = { version = "0.2.20", default-features = false, features = ["simd", "std"] } predicates = { version = "3.1.3" } -proc-macro2 = { version = "1.0.95" } +proc-macro2 = { version = "1.0.101" } rand-274715c4dabd11b0 = { package = "rand", version = "0.9.2" } rand-c38e5c1d305a1b54 = { package = "rand", version = "0.8.5" } rand_chacha-274715c4dabd11b0 = { package = "rand_chacha", version = "0.9.0", default-features = false, features = ["std"] } From d13dfcf693eb4e2c27f9b8b52b4fd6f42f93bed9 Mon Sep 17 00:00:00 2001 From: Kai Sforza Date: Fri, 5 Sep 2025 21:07:57 -0500 Subject: [PATCH 35/50] nix: fix flake versions (#8969) Rust 1.89 wasn't available on that nixpkgs revision, but it was specified in the `rust-toolchain.toml` file. --- flake.lock | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/flake.lock b/flake.lock index 581e225ded3..d59b53d2afe 100644 --- a/flake.lock +++ b/flake.lock @@ -2,11 +2,11 @@ "nodes": { "nixpkgs": { "locked": { - "lastModified": 1749285348, - "narHash": "sha256-frdhQvPbmDYaScPFiCnfdh3B/Vh81Uuoo0w5TkWmmjU=", + "lastModified": 1756542300, + "narHash": "sha256-tlOn88coG5fzdyqz6R93SQL5Gpq+m/DsWpekNFhqPQk=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "3e3afe5174c561dee0df6f2c2b2236990146329f", + "rev": "d7600c775f877cd87b4f5a831c28aa94137377aa", "type": "github" }, "original": { @@ -29,11 +29,11 @@ ] }, "locked": { - "lastModified": 1749436897, - "narHash": "sha256-OkDtaCGQQVwVFz5HWfbmrMJR99sFIMXHCHEYXzUJEJY=", + "lastModified": 1756694554, + "narHash": "sha256-z/Iy4qvcMqzhA2IAAg71Sw4BrMwbBHvCS90ZoPLsnIk=", "owner": "oxalica", "repo": "rust-overlay", - "rev": "e7876c387e35dc834838aff254d8e74cf5bd4f19", + "rev": "b29e5365120f344fe7161f14fc9e272fcc41ee56", "type": "github" }, "original": { From 67beca5452ad7e692e232efb1d3eeaa1cd54f305 Mon Sep 17 00:00:00 2001 From: Justin Date: Fri, 5 Sep 2025 22:29:26 -0400 Subject: [PATCH 36/50] Explain how to authenticate to the now running control plane. (#8977) --- dev-tools/omicron-dev/src/main.rs | 1 + docs/how-to-run-simulated.adoc | 11 +++++++++++ nexus/test-utils/src/lib.rs | 5 +++++ 3 files changed, 17 insertions(+) diff --git a/dev-tools/omicron-dev/src/main.rs b/dev-tools/omicron-dev/src/main.rs index fb62f1dd49f..0dcf2c229c5 100644 --- a/dev-tools/omicron-dev/src/main.rs +++ b/dev-tools/omicron-dev/src/main.rs @@ -159,6 +159,7 @@ impl RunAllArgs { "omicron-dev: privileged user name: {}", cptestctx.user_name.as_ref(), ); + println!("omicron-dev: privileged password: {}", cptestctx.password); // Wait for a signal. let caught_signal = signal_stream.next().await; diff --git a/docs/how-to-run-simulated.adoc b/docs/how-to-run-simulated.adoc index a3a72591bcd..4ead4c328ee 100644 --- a/docs/how-to-run-simulated.adoc +++ b/docs/how-to-run-simulated.adoc @@ -307,6 +307,17 @@ Once everything is up and running, you can use the system in a few ways: * Use the browser-based console. The Nexus log output will show what IP address and port it's listening on. This is also configured in the config file. If you're using the defaults with `omicron-dev run-all`, you can reach the console at `http://127.0.0.1:12220/projects`. If you ran a second Nexus using the `config-second.toml` config file, it will be on port `12222` instead (because that config file specifies port 12222). Depending on the environment where you're running this, you may need an ssh tunnel or the like to reach this from your browser. * Use the xref:cli.adoc[`oxide` CLI]. +You can authenticate using a username of `test-privileged` and a password of `oxide`. + +If the console endpoints are returning a 404 run `cargo xtask download console`. + +Initiate a cli login using + +[source,text] +---- +$ oxide auth login --host http://localhost:12220 +---- + == Running with TLS When you run the above, you will wind up with Nexus listening on HTTP (with no TLS) on its external address. This is convenient for debugging, but not representative of a real system. If you want to run it with TLS, you need to tweak the above procedure slightly: diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index 53fa0b6de9f..9d231e1837b 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -193,6 +193,7 @@ pub struct ControlPlaneTestContext { pub initial_blueprint_id: BlueprintUuid, pub silo_name: Name, pub user_name: UserId, + pub password: String, } impl ControlPlaneTestContext { @@ -454,6 +455,7 @@ pub struct ControlPlaneTestContextBuilder<'a, N: NexusServer> { pub silo_name: Option, pub user_name: Option, + pub password: Option, pub simulated_upstairs: Arc, } @@ -503,6 +505,7 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { blueprint_sleds: None, silo_name: None, user_name: None, + password: None, simulated_upstairs: Arc::new(sim::SimulatedUpstairs::new( simulated_upstairs_log, )), @@ -1103,6 +1106,7 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { self.internal_client = Some(testctx_internal); self.silo_name = Some(silo_name); self.user_name = Some(user_name); + self.password = Some(TEST_SUITE_PASSWORD.to_string()); self.server = Some(server); } @@ -1476,6 +1480,7 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { initial_blueprint_id: self.initial_blueprint_id.unwrap(), silo_name: self.silo_name.unwrap(), user_name: self.user_name.unwrap(), + password: self.password.unwrap(), } } From 6fd635c69a299511c9b8bec86237d598ffd38942 Mon Sep 17 00:00:00 2001 From: David Crespo Date: Fri, 5 Sep 2025 21:31:31 -0500 Subject: [PATCH 37/50] Bump web console (fix IP pools) (#9007) https://github.com/oxidecomputer/console/compare/8cc5b253...e0efddfe * [e0efddfe](https://github.com/oxidecomputer/console/commit/e0efddfe) oxidecomputer/console#2897 * [33c4c433](https://github.com/oxidecomputer/console/commit/33c4c433) oxidecomputer/console#2895 * [2e4109a1](https://github.com/oxidecomputer/console/commit/2e4109a1) oxidecomputer/console#2893 --- tools/console_version | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/console_version b/tools/console_version index 3cf8ac57bf5..8ccff996dc4 100644 --- a/tools/console_version +++ b/tools/console_version @@ -1,2 +1,2 @@ -COMMIT="8cc5b253995ec4e5b31635c86804345c48cda944" -SHA2="50731960b779fe5ecdea8b9c947d09f25b7a17f458c82f192f7c6d27a6699d20" +COMMIT="e0efddfe4a6d26fa6d49828e6e1d493dd61cb3bb" +SHA2="25450341f4015e3845587d0fd187c9493b5e3573f1a1233a12361833964e50fb" From 2151b306b688d353e303cee4fd9f0863accba87f Mon Sep 17 00:00:00 2001 From: Ryan Goodfellow Date: Sat, 6 Sep 2025 08:03:55 -0700 Subject: [PATCH 38/50] bump maghemite to pick up #539 (#9009) --- Cargo.lock | 4 ++-- Cargo.toml | 4 ++-- package-manifest.toml | 12 ++++++------ tools/maghemite_ddm_openapi_version | 2 +- tools/maghemite_mg_openapi_version | 2 +- tools/maghemite_mgd_checksums | 4 ++-- 6 files changed, 14 insertions(+), 14 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9eee8a2e185..5811f7fe56c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2386,7 +2386,7 @@ dependencies = [ [[package]] name = "ddm-admin-client" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/maghemite?rev=e5f53eacc5ab1015c4cd7298912657521a6b8351#e5f53eacc5ab1015c4cd7298912657521a6b8351" +source = "git+https://github.com/oxidecomputer/maghemite?rev=e3c587a47039a6c7aff6cc53b8e72e5328d57fc0#e3c587a47039a6c7aff6cc53b8e72e5328d57fc0" dependencies = [ "oxnet", "percent-encoding", @@ -6062,7 +6062,7 @@ dependencies = [ [[package]] name = "mg-admin-client" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/maghemite?rev=e5f53eacc5ab1015c4cd7298912657521a6b8351#e5f53eacc5ab1015c4cd7298912657521a6b8351" +source = "git+https://github.com/oxidecomputer/maghemite?rev=e3c587a47039a6c7aff6cc53b8e72e5328d57fc0#e3c587a47039a6c7aff6cc53b8e72e5328d57fc0" dependencies = [ "anyhow", "chrono", diff --git a/Cargo.toml b/Cargo.toml index 36527a0183e..bddf6750fda 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -528,8 +528,8 @@ newtype_derive = "0.1.6" ntp-admin-api = { path = "ntp-admin/api" } ntp-admin-client = { path = "clients/ntp-admin-client" } ntp-admin-types = { path = "ntp-admin/types" } -mg-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "e5f53eacc5ab1015c4cd7298912657521a6b8351" } -ddm-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "e5f53eacc5ab1015c4cd7298912657521a6b8351" } +mg-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "e3c587a47039a6c7aff6cc53b8e72e5328d57fc0" } +ddm-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "e3c587a47039a6c7aff6cc53b8e72e5328d57fc0" } multimap = "0.10.1" nexus-auth = { path = "nexus/auth" } nexus-background-task-interface = { path = "nexus/background-task-interface" } diff --git a/package-manifest.toml b/package-manifest.toml index bbc3ffd62f0..911f70ae33c 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -653,10 +653,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "e5f53eacc5ab1015c4cd7298912657521a6b8351" +source.commit = "e3c587a47039a6c7aff6cc53b8e72e5328d57fc0" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mg-ddm-gz.sha256.txt -source.sha256 = "0743e486c7c4183794c0b4f321f6ce7169fc95753f0c9240e407fd64f949ab38" +source.sha256 = "c27b0a2fbfd19c76f2a7739956d61403d5663d289f2e7b153a2175ca85726b1a" output.type = "tarball" [package.mg-ddm] @@ -669,10 +669,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "e5f53eacc5ab1015c4cd7298912657521a6b8351" +source.commit = "e3c587a47039a6c7aff6cc53b8e72e5328d57fc0" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mg-ddm.sha256.txt -source.sha256 = "a1af0c959e532b78137c4466b9242c2583f753b942d92708783fafe5f937643b" +source.sha256 = "152eb8597e56a5d121123e096ca878fe4c8a50da81949b96a7498ed7acbec24e" output.type = "zone" output.intermediate_only = true @@ -684,10 +684,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "e5f53eacc5ab1015c4cd7298912657521a6b8351" +source.commit = "e3c587a47039a6c7aff6cc53b8e72e5328d57fc0" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mgd.sha256.txt -source.sha256 = "2281f14eda20b97a1933aaf7bba8cc07e6b65da39d6dc62ce210f9601fcc3396" +source.sha256 = "d069f4a09ade29b34559df03c02df45e8ef8259b8fc7c0b0475c7f64d046d631" output.type = "zone" output.intermediate_only = true diff --git a/tools/maghemite_ddm_openapi_version b/tools/maghemite_ddm_openapi_version index e77e9e13128..0cd44742f11 100644 --- a/tools/maghemite_ddm_openapi_version +++ b/tools/maghemite_ddm_openapi_version @@ -1,2 +1,2 @@ -COMMIT="e5f53eacc5ab1015c4cd7298912657521a6b8351" +COMMIT="e3c587a47039a6c7aff6cc53b8e72e5328d57fc0" SHA2="9146aaf60a52ecd138139708e4019e4496f330fb81a2c5a7a70cd3436a6a1318" diff --git a/tools/maghemite_mg_openapi_version b/tools/maghemite_mg_openapi_version index 31a7a21fd5b..5ee954194a3 100644 --- a/tools/maghemite_mg_openapi_version +++ b/tools/maghemite_mg_openapi_version @@ -1,2 +1,2 @@ -COMMIT="e5f53eacc5ab1015c4cd7298912657521a6b8351" +COMMIT="e3c587a47039a6c7aff6cc53b8e72e5328d57fc0" SHA2="3d68b221bd0a72cf87195a8a4c8ae408daadf50763349606b9df8186b2d6fe9a" diff --git a/tools/maghemite_mgd_checksums b/tools/maghemite_mgd_checksums index 67987a519ed..7f29ae4b6a0 100644 --- a/tools/maghemite_mgd_checksums +++ b/tools/maghemite_mgd_checksums @@ -1,2 +1,2 @@ -CIDL_SHA256="2281f14eda20b97a1933aaf7bba8cc07e6b65da39d6dc62ce210f9601fcc3396" -MGD_LINUX_SHA256="a7f483807db3b1f98ceece73e126f09ae8c3b47f4fab1a303b74819caea64c73" \ No newline at end of file +CIDL_SHA256="d069f4a09ade29b34559df03c02df45e8ef8259b8fc7c0b0475c7f64d046d631" +MGD_LINUX_SHA256="41f180e21bda4b0a7474d928b8cf450cb82f2fe621eb096866f195fa45f862a6" \ No newline at end of file From 044713a6afe8b4848cb6a917d3b0b7a83e072bf0 Mon Sep 17 00:00:00 2001 From: alan Date: Sun, 7 Sep 2025 02:10:49 +0000 Subject: [PATCH 39/50] remove debug messages --- nexus/src/app/quiesce.rs | 6 ------ 1 file changed, 6 deletions(-) diff --git a/nexus/src/app/quiesce.rs b/nexus/src/app/quiesce.rs index 762345a9384..674366d8ecc 100644 --- a/nexus/src/app/quiesce.rs +++ b/nexus/src/app/quiesce.rs @@ -282,12 +282,6 @@ mod test { assert!(duration_total >= duration_draining_sagas); assert!(duration_total >= duration_draining_db); assert!(duration_total >= duration_recording_quiesce); - // Add additional debug information trying to find a test flake - eprintln!( - "dt: {:?} <= {after} - {before} which evaluates to: {:?}", - duration_total, - (after - before).to_std().unwrap() - ); assert!(duration_total <= (after - before).to_std().unwrap()); assert!(status.sagas.sagas_pending.is_empty()); assert!(status.db_claims.is_empty()); From 3c4736cbb18e149dc6a0a7a3aea4644bcf9b8a53 Mon Sep 17 00:00:00 2001 From: Nils Nieuwejaar Date: Fri, 5 Sep 2025 20:59:32 -0400 Subject: [PATCH 40/50] Update dendrite, lldpd, maghemite, and sidecar-lite to get ipv6 ecmp (#8987) --- tools/maghemite_mgd_checksums | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/maghemite_mgd_checksums b/tools/maghemite_mgd_checksums index 7f29ae4b6a0..88a98dd95f3 100644 --- a/tools/maghemite_mgd_checksums +++ b/tools/maghemite_mgd_checksums @@ -1,2 +1,2 @@ CIDL_SHA256="d069f4a09ade29b34559df03c02df45e8ef8259b8fc7c0b0475c7f64d046d631" -MGD_LINUX_SHA256="41f180e21bda4b0a7474d928b8cf450cb82f2fe621eb096866f195fa45f862a6" \ No newline at end of file +MGD_LINUX_SHA256="41f180e21bda4b0a7474d928b8cf450cb82f2fe621eb096866f195fa45f862a6" From fca7c7b7a64328272ca295a2e3a7f45498f2f0b2 Mon Sep 17 00:00:00 2001 From: alan Date: Sun, 7 Sep 2025 15:52:16 +0000 Subject: [PATCH 41/50] Remove more debug logs --- nexus/tests/integration_tests/crucible_replacements.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/nexus/tests/integration_tests/crucible_replacements.rs b/nexus/tests/integration_tests/crucible_replacements.rs index bf9eb123971..33dacbcc711 100644 --- a/nexus/tests/integration_tests/crucible_replacements.rs +++ b/nexus/tests/integration_tests/crucible_replacements.rs @@ -403,10 +403,6 @@ mod region_replacement { // Assert the request is in state Complete - eprintln!( - "Waited for all replacements, including {:?}", - self.replacement_request_id - ); let region_replacement = self .datastore .get_region_replacement_request_by_id( From 732cbb175fdcad537e24a4777e95faf5e1c8c582 Mon Sep 17 00:00:00 2001 From: alan Date: Sun, 7 Sep 2025 18:16:17 +0000 Subject: [PATCH 42/50] Remove another debug print, run tests again --- nexus/tests/integration_tests/crucible_replacements.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/nexus/tests/integration_tests/crucible_replacements.rs b/nexus/tests/integration_tests/crucible_replacements.rs index 33dacbcc711..02ea82ad64f 100644 --- a/nexus/tests/integration_tests/crucible_replacements.rs +++ b/nexus/tests/integration_tests/crucible_replacements.rs @@ -1071,7 +1071,6 @@ async fn test_racing_replacements_for_soft_deleted_disk_volume( activate_background_task(&internal_client, "region_replacement_driver") .await; - eprintln!("last_background_task {:?}", last_background_task); let res = match last_background_task.last { LastResult::Completed(last_result_completed) => { match serde_json::from_value::( From e06766ace7d3d62c383dff5788bb081ca9a7f9fb Mon Sep 17 00:00:00 2001 From: alan Date: Sun, 7 Sep 2025 21:38:24 +0000 Subject: [PATCH 43/50] Remove yet another debug print, run tests again --- .../tests/integration_tests/crucible_replacements.rs | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/nexus/tests/integration_tests/crucible_replacements.rs b/nexus/tests/integration_tests/crucible_replacements.rs index 02ea82ad64f..08fc433ae1f 100644 --- a/nexus/tests/integration_tests/crucible_replacements.rs +++ b/nexus/tests/integration_tests/crucible_replacements.rs @@ -1659,16 +1659,6 @@ mod region_snapshot_replacement { .await .unwrap(); - /* - let pre_result = self - .datastore - .lookup_region_snapshot_replacement_request( - &self.opctx(), - RegionSnapshot,, - ) - .await - */ - let result = self .datastore .create_region_snapshot_replacement_step( @@ -1679,7 +1669,6 @@ mod region_snapshot_replacement { .await .unwrap(); - eprintln!("result: {:?}", result); // ZZZ: is "AlreadyHandled" an error here? // Could that be a valid result if some other actor put the // replacement step into place? From 787181074a306b4c353963050b4e55b0dfc4e624 Mon Sep 17 00:00:00 2001 From: alan Date: Mon, 8 Sep 2025 00:45:15 +0000 Subject: [PATCH 44/50] Remove another comment, run tests again --- nexus/tests/integration_tests/crucible_replacements.rs | 9 --------- 1 file changed, 9 deletions(-) diff --git a/nexus/tests/integration_tests/crucible_replacements.rs b/nexus/tests/integration_tests/crucible_replacements.rs index 08fc433ae1f..314b5045996 100644 --- a/nexus/tests/integration_tests/crucible_replacements.rs +++ b/nexus/tests/integration_tests/crucible_replacements.rs @@ -1669,15 +1669,6 @@ mod region_snapshot_replacement { .await .unwrap(); - // ZZZ: is "AlreadyHandled" an error here? - // Could that be a valid result if some other actor put the - // replacement step into place? - // We get back: - // bad result: AlreadyHandled { existing_step_id: 83e38140-f238-4fed-8cef-58121d507a49 - // } - // Can we dump an existing ID and get more info from it? Yes, but it's also - // possible it's also done, and the request ID is not found. - // We unwrap with: internal_message: "unexpected database error: Record not found" match result { InsertStepResult::Inserted { .. } => {} From 687fe86ef24239565ccb9c28f34f746ea04ceee4 Mon Sep 17 00:00:00 2001 From: alan Date: Mon, 8 Sep 2025 03:54:07 +0000 Subject: [PATCH 45/50] Update comment, run tests again --- sled-agent/src/sim/http_entrypoints_pantry.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sled-agent/src/sim/http_entrypoints_pantry.rs b/sled-agent/src/sim/http_entrypoints_pantry.rs index c10795a923a..0955aaa4dff 100644 --- a/sled-agent/src/sim/http_entrypoints_pantry.rs +++ b/sled-agent/src/sim/http_entrypoints_pantry.rs @@ -400,7 +400,7 @@ mod tests { "https://raw.githubusercontent.com/oxidecomputer/crucible/{part}/openapi/crucible-pantry.json", ); - // The default timeout of 30 seconds was sometimes failing under + // The default timeout of 30 seconds was sometimes not enough // heavy load. let raw_json = reqwest::blocking::Client::builder() .timeout(std::time::Duration::from_secs(120)) From f4b32710463a88d0396c65802ccef16c745795bc Mon Sep 17 00:00:00 2001 From: alan Date: Mon, 8 Sep 2025 15:48:58 +0000 Subject: [PATCH 46/50] fix mismerge for mgd_checksums --- tools/maghemite_mgd_checksums | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/maghemite_mgd_checksums b/tools/maghemite_mgd_checksums index 88a98dd95f3..7f29ae4b6a0 100644 --- a/tools/maghemite_mgd_checksums +++ b/tools/maghemite_mgd_checksums @@ -1,2 +1,2 @@ CIDL_SHA256="d069f4a09ade29b34559df03c02df45e8ef8259b8fc7c0b0475c7f64d046d631" -MGD_LINUX_SHA256="41f180e21bda4b0a7474d928b8cf450cb82f2fe621eb096866f195fa45f862a6" +MGD_LINUX_SHA256="41f180e21bda4b0a7474d928b8cf450cb82f2fe621eb096866f195fa45f862a6" \ No newline at end of file From 725da3667246396f43bd1688036f8c583b713f92 Mon Sep 17 00:00:00 2001 From: alan Date: Mon, 8 Sep 2025 17:42:13 +0000 Subject: [PATCH 47/50] remove debug function, run tests again --- .../crucible_replacements.rs | 42 ++----------------- 1 file changed, 3 insertions(+), 39 deletions(-) diff --git a/nexus/tests/integration_tests/crucible_replacements.rs b/nexus/tests/integration_tests/crucible_replacements.rs index 314b5045996..12bb1f8f62a 100644 --- a/nexus/tests/integration_tests/crucible_replacements.rs +++ b/nexus/tests/integration_tests/crucible_replacements.rs @@ -1692,7 +1692,7 @@ mod region_snapshot_replacement { pub async fn assert_read_only_target_gone(&self) { eprintln!( - "NOW1 starting, replace_request_id: {:?}", + "Starting, replace_request_id: {:?}", self.replacement_request_id ); let mut i = 1; @@ -1706,7 +1706,7 @@ mod region_snapshot_replacement { .await .unwrap(); eprintln!( - "NOW2 rs_replace_request: {:?}", + "In loop {i} with rs_replace_request: {:?}", region_snapshot_replace_request ); @@ -1716,7 +1716,7 @@ mod region_snapshot_replacement { .await .unwrap(); - eprintln!("NOW3 target that should be gone: {:?}", res); + eprintln!("In loop {i} target that should be gone: {:?}", res); if res.is_none() { // test pass, move on break; @@ -1726,37 +1726,6 @@ mod region_snapshot_replacement { i += 1; } } - pub async fn pre_assert_read_only_target_gone(&self) { - eprintln!( - "PRE1 replace_request_id: {:?}", - self.replacement_request_id - ); - let region_snapshot_replace_request = self - .datastore - .get_region_snapshot_replacement_request_by_id( - &self.opctx(), - self.replacement_request_id, - ) - .await - .unwrap(); - - eprintln!( - "PRE2 rs_replace_request: {:?}", - region_snapshot_replace_request - ); - match self - .datastore - .read_only_target_addr(®ion_snapshot_replace_request) - .await - { - Ok(res) => { - eprintln!("PRE3 target that will be gone: {:?}", res); - } - Err(e) => { - eprintln!("PRE3 target will be gone is error: {:?}", e); - } - } - } pub async fn remove_disk_from_snapshot_rop(&self) { let disk_url = get_disk_url("disk-from-snapshot"); @@ -2033,14 +2002,9 @@ async fn test_region_snapshot_replacement_step_after_rop_remove_target_gone( test_harness.transition_request_to_replacement_done().await; test_harness.transition_request_to_running().await; - eprintln!("ROP ONE"); - test_harness.pre_assert_read_only_target_gone().await; test_harness.create_manual_region_snapshot_replacement_step().await; - test_harness.pre_assert_read_only_target_gone().await; test_harness.delete_the_disk().await; - test_harness.pre_assert_read_only_target_gone().await; test_harness.delete_the_snapshot().await; - test_harness.pre_assert_read_only_target_gone().await; // Remove the ROP of the disk created from the snapshot test_harness.remove_disk_from_snapshot_rop().await; From 9c49f877e5c2785f0c7b60c853c24b10d0491dd6 Mon Sep 17 00:00:00 2001 From: alan Date: Mon, 15 Sep 2025 22:55:52 +0000 Subject: [PATCH 48/50] PR comments --- nexus/tests/integration_tests/crucible_replacements.rs | 4 +--- nexus/tests/integration_tests/instances.rs | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/nexus/tests/integration_tests/crucible_replacements.rs b/nexus/tests/integration_tests/crucible_replacements.rs index 12bb1f8f62a..2d51a2d8a5c 100644 --- a/nexus/tests/integration_tests/crucible_replacements.rs +++ b/nexus/tests/integration_tests/crucible_replacements.rs @@ -481,7 +481,7 @@ mod region_replacement { } }, &std::time::Duration::from_millis(50), - &std::time::Duration::from_secs(260), // 60 was not enough + &std::time::Duration::from_secs(260), ) .await .expect("request transitioned to expected state"); @@ -1730,7 +1730,6 @@ mod region_snapshot_replacement { pub async fn remove_disk_from_snapshot_rop(&self) { let disk_url = get_disk_url("disk-from-snapshot"); - eprintln!("NOW Remove disk from snapshot for disk {:?}", disk_url); let disk_from_snapshot: external::Disk = NexusRequest::object_get(&self.client, &disk_url) .authn_as(AuthnMode::PrivilegedUser) @@ -1742,7 +1741,6 @@ mod region_snapshot_replacement { let disk_id = disk_from_snapshot.identity.id; - eprintln!("NOW Remove disk id {:?}", disk_id); // Note: `make_request` needs a type here, otherwise rustc cannot // figure out the type of the `request_body` parameter self.internal_client diff --git a/nexus/tests/integration_tests/instances.rs b/nexus/tests/integration_tests/instances.rs index a6f21982609..746c162f359 100644 --- a/nexus/tests/integration_tests/instances.rs +++ b/nexus/tests/integration_tests/instances.rs @@ -6891,7 +6891,7 @@ pub async fn instance_wait_for_state_as( instance_id: InstanceUuid, state: omicron_common::api::external::InstanceState, ) -> Instance { - const MAX_WAIT: Duration = Duration::from_secs(320); // 120 was not enough + const MAX_WAIT: Duration = Duration::from_secs(320); slog::info!( &client.client_log, From eb68c0bc541bbdce7583c92954e32976519a3a5c Mon Sep 17 00:00:00 2001 From: alan Date: Fri, 19 Sep 2025 19:00:17 +0000 Subject: [PATCH 49/50] Make some tests fail, no loop --- .../region_snapshot_replacement_start.rs | 38 ++++++------------- 1 file changed, 12 insertions(+), 26 deletions(-) diff --git a/nexus/src/app/sagas/region_snapshot_replacement_start.rs b/nexus/src/app/sagas/region_snapshot_replacement_start.rs index dd80c80cea1..a0c64884c15 100644 --- a/nexus/src/app/sagas/region_snapshot_replacement_start.rs +++ b/nexus/src/app/sagas/region_snapshot_replacement_start.rs @@ -1535,35 +1535,21 @@ pub(crate) mod test { ) { let opctx = test_opctx(cptestctx); - let mut i = 1; - loop { - let db_request = datastore - .get_region_snapshot_replacement_request_by_id( - &opctx, request.id, - ) - .await - .unwrap(); + let db_request = datastore + .get_region_snapshot_replacement_request_by_id( + &opctx, request.id, + ) + .await + .unwrap(); - assert_eq!(db_request.new_region_id, None); - assert_eq!(db_request.operating_saga_id, None); + assert_eq!(db_request.new_region_id, None); + assert_eq!(db_request.operating_saga_id, None); - if !matches!( - db_request.replacement_state, - RegionSnapshotReplacementState::Requested - ) { - eprintln!( - "loop {i} Failed {:?} != Requested", - db_request.replacement_state - ); - // 200 * 5 = 1000 seconds, at this point something is wrong. - if i > 200 { - panic!("Failed to reach requested state after {i} tries"); - } - tokio::time::sleep(std::time::Duration::from_secs(5)).await; - } else { - break; + match db_request.replacement_state { + RegionSnapshotReplacementState::Requested => {} + x => { + panic!("replacement state {:?} != Requested", x); } - i += 1; } } From 045726167e571595224a42d1f01dbef7ceb5cd94 Mon Sep 17 00:00:00 2001 From: alan Date: Fri, 19 Sep 2025 19:01:41 +0000 Subject: [PATCH 50/50] cargo fmt --- nexus/src/app/sagas/region_snapshot_replacement_start.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/nexus/src/app/sagas/region_snapshot_replacement_start.rs b/nexus/src/app/sagas/region_snapshot_replacement_start.rs index a0c64884c15..d3cf05af796 100644 --- a/nexus/src/app/sagas/region_snapshot_replacement_start.rs +++ b/nexus/src/app/sagas/region_snapshot_replacement_start.rs @@ -1536,9 +1536,7 @@ pub(crate) mod test { let opctx = test_opctx(cptestctx); let db_request = datastore - .get_region_snapshot_replacement_request_by_id( - &opctx, request.id, - ) + .get_region_snapshot_replacement_request_by_id(&opctx, request.id) .await .unwrap();