From 8e99868b6cb13a4215a1d546afdeefdfe058e2e8 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 30 Nov 2022 11:20:08 -0500 Subject: [PATCH 1/2] Make backoff policy explicit --- common/src/backoff.rs | 31 +++++++++++++------- nexus/src/app/oximeter.rs | 2 +- nexus/src/app/sagas/common_storage.rs | 2 +- nexus/src/db/saga_recovery.rs | 4 +-- nexus/src/populate.rs | 2 +- nexus/tests/integration_tests/disks.rs | 2 +- oximeter/collector/src/lib.rs | 4 +-- sled-agent/src/bootstrap/agent.rs | 4 +-- sled-agent/src/bootstrap/ddm_admin_client.rs | 4 +-- sled-agent/src/bootstrap/rss_handle.rs | 10 +++++-- sled-agent/src/illumos/svc.rs | 2 +- sled-agent/src/instance.rs | 4 +-- sled-agent/src/rack_setup/service.rs | 19 ++++++------ sled-agent/src/sim/server.rs | 4 +-- sled-agent/src/sled_agent.rs | 6 ++-- sled-agent/src/storage_manager.rs | 6 ++-- 16 files changed, 60 insertions(+), 46 deletions(-) diff --git a/common/src/backoff.rs b/common/src/backoff.rs index 46f05d899a6..a1776e29276 100644 --- a/common/src/backoff.rs +++ b/common/src/backoff.rs @@ -10,22 +10,33 @@ pub use ::backoff::future::{retry, retry_notify}; pub use ::backoff::Error as BackoffError; pub use ::backoff::{backoff::Backoff, ExponentialBackoff, Notify}; -/// Return a backoff policy appropriate for retrying internal services -/// indefinitely. -pub fn internal_service_policy() -> ::backoff::ExponentialBackoff { +/// Return a backoff policy for querying internal services which may not be up +/// for a relatively long amount of time. +pub fn internal_service_policy_long() -> ::backoff::ExponentialBackoff { + const INITIAL_INTERVAL: Duration = Duration::from_millis(250); const MAX_INTERVAL: Duration = Duration::from_secs(60 * 60); - internal_service_policy_with_max(MAX_INTERVAL) + internal_service_policy_with_max(INITIAL_INTERVAL, MAX_INTERVAL) +} + +/// Return a backoff policy for querying conditions that are expected to +/// complete in a relatively shorter amount of time than +/// [internal_service_policy_long]. +pub fn internal_service_policy_short() -> ::backoff::ExponentialBackoff { + const INITIAL_INTERVAL: Duration = Duration::from_millis(50); + const MAX_INTERVAL: Duration = Duration::from_secs(1); + internal_service_policy_with_max(INITIAL_INTERVAL, MAX_INTERVAL) } -pub fn internal_service_policy_with_max( - max_duration: Duration, +fn internal_service_policy_with_max( + initial_interval: Duration, + max_interval: Duration, ) -> ::backoff::ExponentialBackoff { - const INITIAL_INTERVAL: Duration = Duration::from_millis(250); + let current_interval = initial_interval; ::backoff::ExponentialBackoff { - current_interval: INITIAL_INTERVAL, - initial_interval: INITIAL_INTERVAL, + current_interval, + initial_interval, multiplier: 2.0, - max_interval: max_duration, + max_interval, max_elapsed_time: None, ..backoff::ExponentialBackoff::default() } diff --git a/nexus/src/app/oximeter.rs b/nexus/src/app/oximeter.rs index e59a67c4507..698195bc72a 100644 --- a/nexus/src/app/oximeter.rs +++ b/nexus/src/app/oximeter.rs @@ -172,7 +172,7 @@ impl super::Nexus { ); }; backoff::retry_notify( - backoff::internal_service_policy(), + backoff::internal_service_policy_long(), register, log_registration_failure, ).await diff --git a/nexus/src/app/sagas/common_storage.rs b/nexus/src/app/sagas/common_storage.rs index 2a692f545e8..6ce71387035 100644 --- a/nexus/src/app/sagas/common_storage.rs +++ b/nexus/src/app/sagas/common_storage.rs @@ -69,7 +69,7 @@ pub async fn ensure_region_in_dataset( }; let region = backoff::retry_notify( - backoff::internal_service_policy(), + backoff::internal_service_policy_long(), create_region, log_create_failure, ) diff --git a/nexus/src/db/saga_recovery.rs b/nexus/src/db/saga_recovery.rs index 8f876fb0713..44abb510327 100644 --- a/nexus/src/db/saga_recovery.rs +++ b/nexus/src/db/saga_recovery.rs @@ -9,7 +9,7 @@ use crate::db; use futures::{future::BoxFuture, TryFutureExt}; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Error; -use omicron_common::backoff::internal_service_policy; +use omicron_common::backoff::internal_service_policy_long; use omicron_common::backoff::retry_notify; use omicron_common::backoff::BackoffError; use std::future::Future; @@ -92,7 +92,7 @@ where // (pages) goes up. We'd be much more likely to finish the overall // operation if we didn't throw away the results we did get each time. let found_sagas = retry_notify( - internal_service_policy(), + internal_service_policy_long(), || async { list_unfinished_sagas(&opctx, &datastore, &sec_id) .await diff --git a/nexus/src/populate.rs b/nexus/src/populate.rs index 062e314c219..300c8102a7c 100644 --- a/nexus/src/populate.rs +++ b/nexus/src/populate.rs @@ -100,7 +100,7 @@ async fn populate( ) -> Result<(), String> { for p in *ALL_POPULATORS { let db_result = backoff::retry_notify( - backoff::internal_service_policy(), + backoff::internal_service_policy_long(), || async { p.populate(opctx, datastore, args).await.map_err(|error| { match &error { diff --git a/nexus/tests/integration_tests/disks.rs b/nexus/tests/integration_tests/disks.rs index 22a96cde435..e5417a17afb 100644 --- a/nexus/tests/integration_tests/disks.rs +++ b/nexus/tests/integration_tests/disks.rs @@ -1154,7 +1154,7 @@ async fn query_for_metrics_until_they_exist( path: &str, ) -> ResultsPage { backoff::retry_notify( - backoff::internal_service_policy(), + backoff::internal_service_policy_short(), || async { let measurements: ResultsPage = objects_list_page_authz(client, path).await; diff --git a/oximeter/collector/src/lib.rs b/oximeter/collector/src/lib.rs index e25434af780..68a39ed89be 100644 --- a/oximeter/collector/src/lib.rs +++ b/oximeter/collector/src/lib.rs @@ -443,7 +443,7 @@ impl Oximeter { ); }; let agent = backoff::retry_notify( - backoff::internal_service_policy(), + backoff::internal_service_policy_short(), make_agent, log_client_failure, ) @@ -503,7 +503,7 @@ impl Oximeter { ); }; backoff::retry_notify( - backoff::internal_service_policy(), + backoff::internal_service_policy_short(), notify_nexus, log_notification_failure, ) diff --git a/sled-agent/src/bootstrap/agent.rs b/sled-agent/src/bootstrap/agent.rs index cb0be2a82a8..8dc4fcb59d4 100644 --- a/sled-agent/src/bootstrap/agent.rs +++ b/sled-agent/src/bootstrap/agent.rs @@ -23,7 +23,7 @@ use crate::sp::SpHandle; use omicron_common::address::Ipv6Subnet; use omicron_common::api::external::{Error as ExternalError, MacAddr}; use omicron_common::backoff::{ - internal_service_policy, retry_notify, BackoffError, + internal_service_policy_short, retry_notify, BackoffError, }; use serde::{Deserialize, Serialize}; use slog::Logger; @@ -331,7 +331,7 @@ impl Agent { ) -> Result { let ddm_admin_client = DdmAdminClient::new(self.log.clone())?; let rack_secret = retry_notify( - internal_service_policy(), + internal_service_policy_short(), || async { let other_agents = { // Manually build up a `HashSet` instead of `.collect()`ing diff --git a/sled-agent/src/bootstrap/ddm_admin_client.rs b/sled-agent/src/bootstrap/ddm_admin_client.rs index 74986105683..e3a7383c1cf 100644 --- a/sled-agent/src/bootstrap/ddm_admin_client.rs +++ b/sled-agent/src/bootstrap/ddm_admin_client.rs @@ -8,7 +8,7 @@ use ddm_admin_client::types::Ipv6Prefix; use ddm_admin_client::Client; use omicron_common::address::Ipv6Subnet; use omicron_common::address::SLED_PREFIX; -use omicron_common::backoff::internal_service_policy; +use omicron_common::backoff::internal_service_policy_short; use omicron_common::backoff::retry_notify; use slog::Logger; use std::net::Ipv6Addr; @@ -65,7 +65,7 @@ impl DdmAdminClient { tokio::spawn(async move { let prefix = Ipv6Prefix { addr: address.net().network(), mask: SLED_PREFIX }; - retry_notify(internal_service_policy(), || async { + retry_notify(internal_service_policy_short(), || async { info!( me.log, "Sending prefix to ddmd for advertisement"; "prefix" => ?prefix, diff --git a/sled-agent/src/bootstrap/rss_handle.rs b/sled-agent/src/bootstrap/rss_handle.rs index a506020b7da..dc899b66231 100644 --- a/sled-agent/src/bootstrap/rss_handle.rs +++ b/sled-agent/src/bootstrap/rss_handle.rs @@ -12,7 +12,7 @@ use crate::rack_setup::service::RackSetupService; use crate::sp::SpHandle; use futures::stream::FuturesUnordered; use futures::StreamExt; -use omicron_common::backoff::internal_service_policy; +use omicron_common::backoff::internal_service_policy_short; use omicron_common::backoff::retry_notify; use omicron_common::backoff::BackoffError; use slog::Logger; @@ -101,8 +101,12 @@ async fn initialize_sled_agent( let log_failure = |error, _| { warn!(log, "failed to start sled agent"; "error" => ?error); }; - retry_notify(internal_service_policy(), sled_agent_initialize, log_failure) - .await?; + retry_notify( + internal_service_policy_short(), + sled_agent_initialize, + log_failure, + ) + .await?; info!(log, "Peer agent initialized"; "peer" => %bootstrap_addr); Ok(()) } diff --git a/sled-agent/src/illumos/svc.rs b/sled-agent/src/illumos/svc.rs index 6d42ea0b400..e4fd1669106 100644 --- a/sled-agent/src/illumos/svc.rs +++ b/sled-agent/src/illumos/svc.rs @@ -32,7 +32,7 @@ mod inner { let log_notification_failure = |_error, _delay| {}; backoff::retry_notify( - backoff::internal_service_policy(), + backoff::internal_service_policy_short(), || async { let mut p = smf::Properties::new(); let properties = { diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index 05f26b974dd..46fd28171c1 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -114,7 +114,7 @@ async fn wait_for_http_server( }; backoff::retry_notify( - backoff::internal_service_policy(), + backoff::internal_service_policy_short(), || async { // This request is nonsensical - we don't expect an instance to // exist - but getting a response that isn't a connection-based @@ -610,7 +610,7 @@ impl Instance { inner.log, "Adding service"; "smf_name" => &smf_instance_name ); backoff::retry_notify( - backoff::internal_service_policy(), + backoff::internal_service_policy_short(), || async { running_zone .run_cmd(&[ diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index ee0ab306787..a65fc0a5240 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -16,7 +16,7 @@ use omicron_common::address::{ get_sled_address, ReservedRackSubnet, DNS_PORT, DNS_SERVER_PORT, }; use omicron_common::backoff::{ - internal_service_policy, retry_notify, BackoffError, + internal_service_policy_short, retry_notify, BackoffError, }; use serde::{Deserialize, Serialize}; use slog::Logger; @@ -201,7 +201,7 @@ impl ServiceInner { warn!(self.log, "failed to create filesystem"; "error" => ?error); }; retry_notify( - internal_service_policy(), + internal_service_policy_short(), filesystem_put, log_failure, ) @@ -249,8 +249,12 @@ impl ServiceInner { let log_failure = |error, _| { warn!(self.log, "failed to initialize services"; "error" => ?error); }; - retry_notify(internal_service_policy(), services_put, log_failure) - .await?; + retry_notify( + internal_service_policy_short(), + services_put, + log_failure, + ) + .await?; Ok(()) } @@ -400,10 +404,7 @@ impl ServiceInner { ) -> Result, DdmError> { let ddm_admin_client = DdmAdminClient::new(self.log.clone())?; let addrs = retry_notify( - // TODO-correctness `internal_service_policy()` has potentially-long - // exponential backoff, which is probably not what we want. See - // https://github.com/oxidecomputer/omicron/issues/1270 - internal_service_policy(), + internal_service_policy_short(), || async { let peer_addrs = ddm_admin_client.peer_addrs().await.map_err(|err| { @@ -450,7 +451,7 @@ impl ServiceInner { ); }, ) - // `internal_service_policy()` retries indefinitely on transient errors + // `internal_service_policy_short()` retries indefinitely on transient errors // (the only kind we produce), allowing us to `.unwrap()` without // panicking .await diff --git a/sled-agent/src/sim/server.rs b/sled-agent/src/sim/server.rs index 59c8ccb01aa..46d3b2a7fcf 100644 --- a/sled-agent/src/sim/server.rs +++ b/sled-agent/src/sim/server.rs @@ -11,7 +11,7 @@ use crate::nexus::NexusClient; use crucible_agent_client::types::State as RegionState; use omicron_common::backoff::{ - internal_service_policy, retry_notify, BackoffError, + internal_service_policy_short, retry_notify, BackoffError, }; use slog::{Drain, Logger}; use std::sync::Arc; @@ -86,7 +86,7 @@ impl Server { "error" => ?error); }; retry_notify( - internal_service_policy(), + internal_service_policy_short(), notify_nexus, log_notification_failure, ) diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 60ef5c68826..015e5d91586 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -33,7 +33,7 @@ use omicron_common::api::{ internal::nexus::UpdateArtifact, }; use omicron_common::backoff::{ - internal_service_policy_with_max, retry_notify, BackoffError, + internal_service_policy_short, retry_notify, BackoffError, }; use slog::Logger; use std::net::{Ipv6Addr, SocketAddrV6}; @@ -524,9 +524,7 @@ impl SledAgent { ); }; retry_notify( - internal_service_policy_with_max( - std::time::Duration::from_secs(1), - ), + internal_service_policy_short(), notify_nexus, log_notification_failure, ) diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index 37401eaf73a..2113ecfe2c5 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -317,7 +317,7 @@ impl DatasetInfo { warn!(log, "cockroachdb not yet alive"); }; backoff::retry_notify( - backoff::internal_service_policy(), + backoff::internal_service_policy_short(), check_health, log_failure, ) @@ -659,7 +659,7 @@ impl StorageWorker { }; nexus_notifications.push_back( backoff::retry_notify( - backoff::internal_service_policy(), + backoff::internal_service_policy_short(), notify_nexus, log_post_failure, ) @@ -705,7 +705,7 @@ impl StorageWorker { }; nexus_notifications.push_back( backoff::retry_notify( - backoff::internal_service_policy(), + backoff::internal_service_policy_short(), notify_nexus, log_post_failure, ) From 2d7373b87ac78b2744a8a585a7296ad5280051f9 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 30 Nov 2022 11:23:58 -0500 Subject: [PATCH 2/2] rename --- common/src/backoff.rs | 6 ++--- nexus/src/app/oximeter.rs | 2 +- nexus/src/app/sagas/common_storage.rs | 2 +- nexus/src/db/saga_recovery.rs | 4 ++-- nexus/src/populate.rs | 2 +- nexus/tests/integration_tests/disks.rs | 2 +- oximeter/collector/src/lib.rs | 4 ++-- sled-agent/src/bootstrap/agent.rs | 6 ++--- sled-agent/src/bootstrap/ddm_admin_client.rs | 4 ++-- sled-agent/src/bootstrap/rss_handle.rs | 10 +++------ sled-agent/src/illumos/svc.rs | 2 +- sled-agent/src/instance.rs | 4 ++-- sled-agent/src/rack_setup/service.rs | 23 +++++--------------- sled-agent/src/sim/server.rs | 6 ++--- sled-agent/src/sled_agent.rs | 6 ++--- sled-agent/src/storage_manager.rs | 6 ++--- 16 files changed, 34 insertions(+), 55 deletions(-) diff --git a/common/src/backoff.rs b/common/src/backoff.rs index a1776e29276..80ce980cfc5 100644 --- a/common/src/backoff.rs +++ b/common/src/backoff.rs @@ -12,7 +12,7 @@ pub use ::backoff::{backoff::Backoff, ExponentialBackoff, Notify}; /// Return a backoff policy for querying internal services which may not be up /// for a relatively long amount of time. -pub fn internal_service_policy_long() -> ::backoff::ExponentialBackoff { +pub fn retry_policy_long() -> ::backoff::ExponentialBackoff { const INITIAL_INTERVAL: Duration = Duration::from_millis(250); const MAX_INTERVAL: Duration = Duration::from_secs(60 * 60); internal_service_policy_with_max(INITIAL_INTERVAL, MAX_INTERVAL) @@ -20,8 +20,8 @@ pub fn internal_service_policy_long() -> ::backoff::ExponentialBackoff { /// Return a backoff policy for querying conditions that are expected to /// complete in a relatively shorter amount of time than -/// [internal_service_policy_long]. -pub fn internal_service_policy_short() -> ::backoff::ExponentialBackoff { +/// [retry_policy_long]. +pub fn retry_policy_short() -> ::backoff::ExponentialBackoff { const INITIAL_INTERVAL: Duration = Duration::from_millis(50); const MAX_INTERVAL: Duration = Duration::from_secs(1); internal_service_policy_with_max(INITIAL_INTERVAL, MAX_INTERVAL) diff --git a/nexus/src/app/oximeter.rs b/nexus/src/app/oximeter.rs index 698195bc72a..1d72aa6b999 100644 --- a/nexus/src/app/oximeter.rs +++ b/nexus/src/app/oximeter.rs @@ -172,7 +172,7 @@ impl super::Nexus { ); }; backoff::retry_notify( - backoff::internal_service_policy_long(), + backoff::retry_policy_long(), register, log_registration_failure, ).await diff --git a/nexus/src/app/sagas/common_storage.rs b/nexus/src/app/sagas/common_storage.rs index 6ce71387035..2a9444784b4 100644 --- a/nexus/src/app/sagas/common_storage.rs +++ b/nexus/src/app/sagas/common_storage.rs @@ -69,7 +69,7 @@ pub async fn ensure_region_in_dataset( }; let region = backoff::retry_notify( - backoff::internal_service_policy_long(), + backoff::retry_policy_long(), create_region, log_create_failure, ) diff --git a/nexus/src/db/saga_recovery.rs b/nexus/src/db/saga_recovery.rs index 44abb510327..41df37d085d 100644 --- a/nexus/src/db/saga_recovery.rs +++ b/nexus/src/db/saga_recovery.rs @@ -9,8 +9,8 @@ use crate::db; use futures::{future::BoxFuture, TryFutureExt}; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Error; -use omicron_common::backoff::internal_service_policy_long; use omicron_common::backoff::retry_notify; +use omicron_common::backoff::retry_policy_long; use omicron_common::backoff::BackoffError; use std::future::Future; use std::pin::Pin; @@ -92,7 +92,7 @@ where // (pages) goes up. We'd be much more likely to finish the overall // operation if we didn't throw away the results we did get each time. let found_sagas = retry_notify( - internal_service_policy_long(), + retry_policy_long(), || async { list_unfinished_sagas(&opctx, &datastore, &sec_id) .await diff --git a/nexus/src/populate.rs b/nexus/src/populate.rs index 300c8102a7c..dcf0de43b7a 100644 --- a/nexus/src/populate.rs +++ b/nexus/src/populate.rs @@ -100,7 +100,7 @@ async fn populate( ) -> Result<(), String> { for p in *ALL_POPULATORS { let db_result = backoff::retry_notify( - backoff::internal_service_policy_long(), + backoff::retry_policy_long(), || async { p.populate(opctx, datastore, args).await.map_err(|error| { match &error { diff --git a/nexus/tests/integration_tests/disks.rs b/nexus/tests/integration_tests/disks.rs index e5417a17afb..be5d5a03d3b 100644 --- a/nexus/tests/integration_tests/disks.rs +++ b/nexus/tests/integration_tests/disks.rs @@ -1154,7 +1154,7 @@ async fn query_for_metrics_until_they_exist( path: &str, ) -> ResultsPage { backoff::retry_notify( - backoff::internal_service_policy_short(), + backoff::retry_policy_short(), || async { let measurements: ResultsPage = objects_list_page_authz(client, path).await; diff --git a/oximeter/collector/src/lib.rs b/oximeter/collector/src/lib.rs index 68a39ed89be..ce1dcaaf0a4 100644 --- a/oximeter/collector/src/lib.rs +++ b/oximeter/collector/src/lib.rs @@ -443,7 +443,7 @@ impl Oximeter { ); }; let agent = backoff::retry_notify( - backoff::internal_service_policy_short(), + backoff::retry_policy_short(), make_agent, log_client_failure, ) @@ -503,7 +503,7 @@ impl Oximeter { ); }; backoff::retry_notify( - backoff::internal_service_policy_short(), + backoff::retry_policy_short(), notify_nexus, log_notification_failure, ) diff --git a/sled-agent/src/bootstrap/agent.rs b/sled-agent/src/bootstrap/agent.rs index 8dc4fcb59d4..9db71329e8a 100644 --- a/sled-agent/src/bootstrap/agent.rs +++ b/sled-agent/src/bootstrap/agent.rs @@ -22,9 +22,7 @@ use crate::server::Server as SledServer; use crate::sp::SpHandle; use omicron_common::address::Ipv6Subnet; use omicron_common::api::external::{Error as ExternalError, MacAddr}; -use omicron_common::backoff::{ - internal_service_policy_short, retry_notify, BackoffError, -}; +use omicron_common::backoff::{retry_notify, retry_policy_short, BackoffError}; use serde::{Deserialize, Serialize}; use slog::Logger; use std::borrow::Cow; @@ -331,7 +329,7 @@ impl Agent { ) -> Result { let ddm_admin_client = DdmAdminClient::new(self.log.clone())?; let rack_secret = retry_notify( - internal_service_policy_short(), + retry_policy_short(), || async { let other_agents = { // Manually build up a `HashSet` instead of `.collect()`ing diff --git a/sled-agent/src/bootstrap/ddm_admin_client.rs b/sled-agent/src/bootstrap/ddm_admin_client.rs index e3a7383c1cf..70a7208b9bd 100644 --- a/sled-agent/src/bootstrap/ddm_admin_client.rs +++ b/sled-agent/src/bootstrap/ddm_admin_client.rs @@ -8,8 +8,8 @@ use ddm_admin_client::types::Ipv6Prefix; use ddm_admin_client::Client; use omicron_common::address::Ipv6Subnet; use omicron_common::address::SLED_PREFIX; -use omicron_common::backoff::internal_service_policy_short; use omicron_common::backoff::retry_notify; +use omicron_common::backoff::retry_policy_short; use slog::Logger; use std::net::Ipv6Addr; use std::net::SocketAddr; @@ -65,7 +65,7 @@ impl DdmAdminClient { tokio::spawn(async move { let prefix = Ipv6Prefix { addr: address.net().network(), mask: SLED_PREFIX }; - retry_notify(internal_service_policy_short(), || async { + retry_notify(retry_policy_short(), || async { info!( me.log, "Sending prefix to ddmd for advertisement"; "prefix" => ?prefix, diff --git a/sled-agent/src/bootstrap/rss_handle.rs b/sled-agent/src/bootstrap/rss_handle.rs index dc899b66231..dec161ac49f 100644 --- a/sled-agent/src/bootstrap/rss_handle.rs +++ b/sled-agent/src/bootstrap/rss_handle.rs @@ -12,8 +12,8 @@ use crate::rack_setup::service::RackSetupService; use crate::sp::SpHandle; use futures::stream::FuturesUnordered; use futures::StreamExt; -use omicron_common::backoff::internal_service_policy_short; use omicron_common::backoff::retry_notify; +use omicron_common::backoff::retry_policy_short; use omicron_common::backoff::BackoffError; use slog::Logger; use sprockets_host::Ed25519Certificate; @@ -101,12 +101,8 @@ async fn initialize_sled_agent( let log_failure = |error, _| { warn!(log, "failed to start sled agent"; "error" => ?error); }; - retry_notify( - internal_service_policy_short(), - sled_agent_initialize, - log_failure, - ) - .await?; + retry_notify(retry_policy_short(), sled_agent_initialize, log_failure) + .await?; info!(log, "Peer agent initialized"; "peer" => %bootstrap_addr); Ok(()) } diff --git a/sled-agent/src/illumos/svc.rs b/sled-agent/src/illumos/svc.rs index e4fd1669106..862306e2c09 100644 --- a/sled-agent/src/illumos/svc.rs +++ b/sled-agent/src/illumos/svc.rs @@ -32,7 +32,7 @@ mod inner { let log_notification_failure = |_error, _delay| {}; backoff::retry_notify( - backoff::internal_service_policy_short(), + backoff::retry_policy_short(), || async { let mut p = smf::Properties::new(); let properties = { diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index 46fd28171c1..7bbfcf6aee9 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -114,7 +114,7 @@ async fn wait_for_http_server( }; backoff::retry_notify( - backoff::internal_service_policy_short(), + backoff::retry_policy_short(), || async { // This request is nonsensical - we don't expect an instance to // exist - but getting a response that isn't a connection-based @@ -610,7 +610,7 @@ impl Instance { inner.log, "Adding service"; "smf_name" => &smf_instance_name ); backoff::retry_notify( - backoff::internal_service_policy_short(), + backoff::retry_policy_short(), || async { running_zone .run_cmd(&[ diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index a65fc0a5240..5e84ec24fe5 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -15,9 +15,7 @@ use internal_dns_client::multiclient::{DnsError, Updater as DnsUpdater}; use omicron_common::address::{ get_sled_address, ReservedRackSubnet, DNS_PORT, DNS_SERVER_PORT, }; -use omicron_common::backoff::{ - internal_service_policy_short, retry_notify, BackoffError, -}; +use omicron_common::backoff::{retry_notify, retry_policy_short, BackoffError}; use serde::{Deserialize, Serialize}; use slog::Logger; use sprockets_host::Ed25519Certificate; @@ -200,12 +198,8 @@ impl ServiceInner { let log_failure = |error, _| { warn!(self.log, "failed to create filesystem"; "error" => ?error); }; - retry_notify( - internal_service_policy_short(), - filesystem_put, - log_failure, - ) - .await?; + retry_notify(retry_policy_short(), filesystem_put, log_failure) + .await?; } Ok(()) } @@ -249,12 +243,7 @@ impl ServiceInner { let log_failure = |error, _| { warn!(self.log, "failed to initialize services"; "error" => ?error); }; - retry_notify( - internal_service_policy_short(), - services_put, - log_failure, - ) - .await?; + retry_notify(retry_policy_short(), services_put, log_failure).await?; Ok(()) } @@ -404,7 +393,7 @@ impl ServiceInner { ) -> Result, DdmError> { let ddm_admin_client = DdmAdminClient::new(self.log.clone())?; let addrs = retry_notify( - internal_service_policy_short(), + retry_policy_short(), || async { let peer_addrs = ddm_admin_client.peer_addrs().await.map_err(|err| { @@ -451,7 +440,7 @@ impl ServiceInner { ); }, ) - // `internal_service_policy_short()` retries indefinitely on transient errors + // `retry_policy_short()` retries indefinitely on transient errors // (the only kind we produce), allowing us to `.unwrap()` without // panicking .await diff --git a/sled-agent/src/sim/server.rs b/sled-agent/src/sim/server.rs index 46d3b2a7fcf..3a691fb93be 100644 --- a/sled-agent/src/sim/server.rs +++ b/sled-agent/src/sim/server.rs @@ -10,9 +10,7 @@ use super::sled_agent::SledAgent; use crate::nexus::NexusClient; use crucible_agent_client::types::State as RegionState; -use omicron_common::backoff::{ - internal_service_policy_short, retry_notify, BackoffError, -}; +use omicron_common::backoff::{retry_notify, retry_policy_short, BackoffError}; use slog::{Drain, Logger}; use std::sync::Arc; @@ -86,7 +84,7 @@ impl Server { "error" => ?error); }; retry_notify( - internal_service_policy_short(), + retry_policy_short(), notify_nexus, log_notification_failure, ) diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 015e5d91586..527e6d3e685 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -32,9 +32,7 @@ use omicron_common::api::{ internal::nexus::DiskRuntimeState, internal::nexus::InstanceRuntimeState, internal::nexus::UpdateArtifact, }; -use omicron_common::backoff::{ - internal_service_policy_short, retry_notify, BackoffError, -}; +use omicron_common::backoff::{retry_notify, retry_policy_short, BackoffError}; use slog::Logger; use std::net::{Ipv6Addr, SocketAddrV6}; use std::process::Command; @@ -524,7 +522,7 @@ impl SledAgent { ); }; retry_notify( - internal_service_policy_short(), + retry_policy_short(), notify_nexus, log_notification_failure, ) diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index 2113ecfe2c5..94cf44bb5d8 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -317,7 +317,7 @@ impl DatasetInfo { warn!(log, "cockroachdb not yet alive"); }; backoff::retry_notify( - backoff::internal_service_policy_short(), + backoff::retry_policy_short(), check_health, log_failure, ) @@ -659,7 +659,7 @@ impl StorageWorker { }; nexus_notifications.push_back( backoff::retry_notify( - backoff::internal_service_policy_short(), + backoff::retry_policy_short(), notify_nexus, log_post_failure, ) @@ -705,7 +705,7 @@ impl StorageWorker { }; nexus_notifications.push_back( backoff::retry_notify( - backoff::internal_service_policy_short(), + backoff::retry_policy_short(), notify_nexus, log_post_failure, )