diff --git a/common/src/api/external/error.rs b/common/src/api/external/error.rs index dc1459f46aa..c50612c53fa 100644 --- a/common/src/api/external/error.rs +++ b/common/src/api/external/error.rs @@ -61,6 +61,9 @@ pub enum Error { #[error("Type version mismatch! {internal_message}")] TypeVersionMismatch { internal_message: String }, + + #[error("Too many requests! {internal_message}")] + TooManyRequests { internal_message: String }, } /// Indicates how an object was looked up (for an `ObjectNotFound` error) @@ -108,7 +111,8 @@ impl Error { /// retried pub fn retryable(&self) -> bool { match self { - Error::ServiceUnavailable { .. } => true, + Error::ServiceUnavailable { .. } + | Error::TooManyRequests { .. } => true, Error::ObjectNotFound { .. } | Error::ObjectAlreadyExists { .. } @@ -174,6 +178,15 @@ impl Error { Error::TypeVersionMismatch { internal_message: message.to_owned() } } + /// Generates an [`Error::TooManyRequests`] with a specific message. + /// + /// TooManyRequests is returned when the control plane can't keep up with + /// requests, but there wasn't an associated error. It's meant to signal to + /// users that the request can be re-attempted. + pub fn too_many_requests(message: &str) -> Error { + Error::TooManyRequests { internal_message: message.to_owned() } + } + /// Given an [`Error`] with an internal message, return the same error with /// `context` prepended to it to provide more context /// @@ -188,6 +201,7 @@ impl Error { | Error::InvalidRequest { .. } | Error::InvalidValue { .. } | Error::Forbidden => self, + Error::Unauthenticated { internal_message } => { Error::Unauthenticated { internal_message: format!( @@ -223,6 +237,14 @@ impl Error { ), } } + Error::TooManyRequests { internal_message } => { + Error::TooManyRequests { + internal_message: format!( + "{}: {}", + context, internal_message + ), + } + } } } } @@ -317,6 +339,14 @@ impl From for HttpError { Error::TypeVersionMismatch { internal_message } => { HttpError::for_internal_error(internal_message) } + + Error::TooManyRequests { internal_message } => { + HttpError::for_client_error( + Some(String::from("TooManyRequests")), + http::StatusCode::TOO_MANY_REQUESTS, + internal_message, + ) + } } } } diff --git a/nexus/src/app/session.rs b/nexus/src/app/session.rs index 7ac23cf5ce3..2ce612df3c1 100644 --- a/nexus/src/app/session.rs +++ b/nexus/src/app/session.rs @@ -155,7 +155,8 @@ impl super::Nexus { | Error::InternalError { .. } | Error::ServiceUnavailable { .. } | Error::MethodNotAllowed { .. } - | Error::TypeVersionMismatch { .. } => { + | Error::TypeVersionMismatch { .. } + | Error::TooManyRequests { .. } => { Reason::UnknownError { source: error } } })?; diff --git a/nexus/src/db/datastore/region.rs b/nexus/src/db/datastore/region.rs index ad354b7bb8e..41df536acfa 100644 --- a/nexus/src/db/datastore/region.rs +++ b/nexus/src/db/datastore/region.rs @@ -20,7 +20,10 @@ use crate::db::model::Zpool; use crate::external_api::params; use async_bb8_diesel::AsyncConnection; use async_bb8_diesel::AsyncRunQueryDsl; +use async_bb8_diesel::{ConnectionError, PoolError}; use diesel::prelude::*; +use diesel::result::DatabaseErrorKind as DieselDatabaseErrorKind; +use diesel::result::Error as DieselError; use omicron_common::api::external::DeleteResult; use omicron_common::api::external::Error; use uuid::Uuid; @@ -314,6 +317,47 @@ impl DataStore { ) => Error::unavail( "Not enough available space to allocate disks", ), + TxnError::Pool(PoolError::Connection( + ConnectionError::Query(DieselError::DatabaseError( + DieselDatabaseErrorKind::SerializationFailure, + ref boxed_error, + )), + )) => { + // Extract the boxed_error from the serialization failure, + // and check if cockroachdb returned a case where the + // transaction didn't fail, but needs to be retried. + // + // From https://www.cockroachlabs.com/docs/v21.2/transactions#client-side-intervention: + // + // To indicate that a transaction must be retried, + // CockroachDB signals an error with the code 40001 and an + // error message that begins with the string "retry + // transaction". + // + // Note I also saw: + // + // restart transaction: TransactionRetryWithProtoRefreshError: TransactionRetryError: retry txn (RETRY_SERIALIZABLE ... + // + // with a link to https://www.cockroachlabs.com/docs/v21.2/transaction-retry-error-reference.html#retry_serializable + // + // Check for "restart transaction" and "retry transaction" + // here, and let users know when too many transactions are + // in flight and causing retryable failures. + if boxed_error.message().starts_with("restart transaction") + || boxed_error + .message() + .starts_with("retry transaction") + { + Error::too_many_requests("transaction must be retried") + } else { + // other types of serialization failure should be + // considered an internal error + Error::internal_error(&format!( + "Transaction error: {}", + e + )) + } + } _ => { Error::internal_error(&format!("Transaction error: {}", e)) }