diff --git a/stackslib/src/burnchains/mod.rs b/stackslib/src/burnchains/mod.rs index c6e42045428..aa3c8332370 100644 --- a/stackslib/src/burnchains/mod.rs +++ b/stackslib/src/burnchains/mod.rs @@ -682,6 +682,8 @@ pub enum Error { UnknownBlock(BurnchainHeaderHash), NonCanonicalPoxId(PoxId, PoxId), CoordinatorClosed, + /// Graceful shutdown error + ShutdownInitiated, } impl fmt::Display for Error { @@ -706,6 +708,7 @@ impl fmt::Display for Error { parent, child ), Error::CoordinatorClosed => write!(f, "ChainsCoordinator channel hung up"), + Error::ShutdownInitiated => write!(f, "Graceful shutdown was initiated"), } } } @@ -728,6 +731,7 @@ impl error::Error for Error { Error::UnknownBlock(_) => None, Error::NonCanonicalPoxId(_, _) => None, Error::CoordinatorClosed => None, + Error::ShutdownInitiated => None, } } } diff --git a/testnet/stacks-node/src/config.rs b/testnet/stacks-node/src/config.rs index 7e2751d7a89..ba771dae6b9 100644 --- a/testnet/stacks-node/src/config.rs +++ b/testnet/stacks-node/src/config.rs @@ -1,9 +1,9 @@ use std::collections::HashSet; -use std::fs; use std::net::{SocketAddr, ToSocketAddrs}; use std::path::PathBuf; use std::sync::{Arc, Mutex}; use std::time::Duration; +use std::{fs, thread}; use clarity::vm::costs::ExecutionCost; use clarity::vm::types::{AssetIdentifier, PrincipalData, QualifiedContractIdentifier}; @@ -1927,7 +1927,42 @@ impl NodeConfig { let pubkey = Secp256k1PublicKey::from_hex(pubkey_str) .unwrap_or_else(|_| panic!("Invalid public key '{pubkey_str}'")); debug!("Resolve '{}'", &hostport); - let sockaddr = hostport.to_socket_addrs().unwrap().next().unwrap(); + + let mut attempts = 0; + let max_attempts = 5; + let mut delay = Duration::from_secs(2); + + let sockaddr = loop { + match hostport.to_socket_addrs() { + Ok(mut addrs) => { + if let Some(addr) = addrs.next() { + break addr; + } else { + panic!("No addresses found for '{}'", hostport); + } + } + Err(e) => { + if attempts >= max_attempts { + panic!( + "Failed to resolve '{}' after {} attempts: {}", + hostport, max_attempts, e + ); + } else { + error!( + "Attempt {} - Failed to resolve '{}': {}. Retrying in {:?}...", + attempts + 1, + hostport, + e, + delay + ); + thread::sleep(delay); + attempts += 1; + delay *= 2; + } + } + } + }; + let neighbor = NodeConfig::default_neighbor(sockaddr, pubkey, chain_id, peer_version); self.bootstrap_node.push(neighbor); } diff --git a/testnet/stacks-node/src/run_loop/nakamoto.rs b/testnet/stacks-node/src/run_loop/nakamoto.rs index 0b3702a994a..dd13b2d32c1 100644 --- a/testnet/stacks-node/src/run_loop/nakamoto.rs +++ b/testnet/stacks-node/src/run_loop/nakamoto.rs @@ -20,7 +20,7 @@ use std::thread::JoinHandle; use std::{cmp, thread}; use stacks::burnchains::bitcoin::address::{BitcoinAddress, LegacyBitcoinAddressType}; -use stacks::burnchains::Burnchain; +use stacks::burnchains::{Burnchain, Error as burnchain_error}; use stacks::chainstate::burn::db::sortdb::SortitionDB; use stacks::chainstate::burn::BlockSnapshot; use stacks::chainstate::coordinator::comm::{CoordinatorChannels, CoordinatorReceivers}; @@ -400,13 +400,27 @@ impl RunLoop { // setup the termination handler, allow it to error if a prior runloop already set it neon::RunLoop::setup_termination_handler(self.should_keep_running.clone(), true); - let mut burnchain = neon::RunLoop::instantiate_burnchain_state( + + let burnchain_result = neon::RunLoop::instantiate_burnchain_state( &self.config, self.should_keep_running.clone(), burnchain_opt, coordinator_senders.clone(), ); + let mut burnchain = match burnchain_result { + Ok(burnchain_controller) => burnchain_controller, + Err(burnchain_error::ShutdownInitiated) => { + info!("Exiting stacks-node"); + return; + } + Err(e) => { + error!("Error initializing burnchain: {}", e); + info!("Exiting stacks-node"); + return; + } + }; + let burnchain_config = burnchain.get_burnchain(); self.burnchain = Some(burnchain_config.clone()); diff --git a/testnet/stacks-node/src/run_loop/neon.rs b/testnet/stacks-node/src/run_loop/neon.rs index 3f5c04f4c2e..f053c58cff0 100644 --- a/testnet/stacks-node/src/run_loop/neon.rs +++ b/testnet/stacks-node/src/run_loop/neon.rs @@ -8,7 +8,7 @@ use std::{cmp, thread}; use libc; use stacks::burnchains::bitcoin::address::{BitcoinAddress, LegacyBitcoinAddressType}; -use stacks::burnchains::Burnchain; +use stacks::burnchains::{Burnchain, Error as burnchain_error}; use stacks::chainstate::burn::db::sortdb::SortitionDB; use stacks::chainstate::burn::BlockSnapshot; use stacks::chainstate::coordinator::comm::{CoordinatorChannels, CoordinatorReceivers}; @@ -31,7 +31,7 @@ use stacks_common::util::{get_epoch_time_secs, sleep_ms}; use stx_genesis::GenesisData; use super::RunLoopCallbacks; -use crate::burnchains::make_bitcoin_indexer; +use crate::burnchains::{make_bitcoin_indexer, Error}; use crate::globals::NeonGlobals as Globals; use crate::monitoring::{start_serving_monitoring_metrics, MonitoringError}; use crate::neon_node::{StacksNode, BLOCK_PROCESSOR_STACK_SIZE, RELAYER_MAX_BUFFER}; @@ -393,13 +393,13 @@ impl RunLoop { should_keep_running: Arc, burnchain_opt: Option, coordinator_senders: CoordinatorChannels, - ) -> BitcoinRegtestController { + ) -> Result { // Initialize and start the burnchain. let mut burnchain_controller = BitcoinRegtestController::with_burnchain( config.clone(), Some(coordinator_senders), burnchain_opt, - Some(should_keep_running), + Some(should_keep_running.clone()), ); let burnchain = burnchain_controller.get_burnchain(); @@ -448,13 +448,21 @@ impl RunLoop { } }; - match burnchain_controller.start(Some(target_burnchain_block_height)) { - Ok(_) => {} - Err(e) => { + burnchain_controller + .start(Some(target_burnchain_block_height)) + .map_err(|e| { + match e { + Error::CoordinatorClosed => { + if !should_keep_running.load(Ordering::SeqCst) { + info!("Shutdown initiated during burnchain initialization: {}", e); + return burnchain_error::ShutdownInitiated; + } + } + Error::IndexerError(_) => {} + } error!("Burnchain controller stopped: {}", e); panic!(); - } - }; + })?; // if the chainstate DBs don't exist, this will instantiate them if let Err(e) = burnchain_controller.connect_dbs() { @@ -464,7 +472,7 @@ impl RunLoop { // TODO (hack) instantiate the sortdb in the burnchain let _ = burnchain_controller.sortdb_mut(); - burnchain_controller + Ok(burnchain_controller) } /// Boot up the stacks chainstate. @@ -514,6 +522,7 @@ impl RunLoop { get_bulk_initial_names: Some(Box::new(move || get_names(use_test_genesis_data))), }; + info!("About to call open_and_exec"); let (chain_state_db, receipts) = StacksChainState::open_and_exec( self.config.is_mainnet(), self.config.burnchain.chain_id, @@ -1007,13 +1016,27 @@ impl RunLoop { .expect("Run loop already started, can only start once after initialization."); Self::setup_termination_handler(self.should_keep_running.clone(), false); - let mut burnchain = Self::instantiate_burnchain_state( + + let burnchain_result = Self::instantiate_burnchain_state( &self.config, self.should_keep_running.clone(), burnchain_opt, coordinator_senders.clone(), ); + let mut burnchain = match burnchain_result { + Ok(burnchain_controller) => burnchain_controller, + Err(burnchain_error::ShutdownInitiated) => { + info!("Exiting stacks-node"); + return; + } + Err(e) => { + error!("Error initializing burnchain: {}", e); + info!("Exiting stacks-node"); + return; + } + }; + let burnchain_config = burnchain.get_burnchain(); self.burnchain = Some(burnchain_config.clone());