Skip to content

Commit

Permalink
Ctrl-C graceful shutdown during burnchain initialization + retry logi…
Browse files Browse the repository at this point in the history
…c when adding a bootstrap node
  • Loading branch information
Marzi authored and 8marz8 committed Mar 4, 2024
1 parent 0a4a6d3 commit 8adedb5
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 15 deletions.
4 changes: 4 additions & 0 deletions stackslib/src/burnchains/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -682,6 +682,8 @@ pub enum Error {
UnknownBlock(BurnchainHeaderHash),
NonCanonicalPoxId(PoxId, PoxId),
CoordinatorClosed,
/// Graceful shutdown error
ShutdownInitiated,
}

impl fmt::Display for Error {
Expand All @@ -706,6 +708,7 @@ impl fmt::Display for Error {
parent, child
),
Error::CoordinatorClosed => write!(f, "ChainsCoordinator channel hung up"),
Error::ShutdownInitiated => write!(f, "Graceful shutdown was initiated"),
}
}
}
Expand All @@ -728,6 +731,7 @@ impl error::Error for Error {
Error::UnknownBlock(_) => None,
Error::NonCanonicalPoxId(_, _) => None,
Error::CoordinatorClosed => None,
Error::ShutdownInitiated => None,
}
}
}
Expand Down
39 changes: 37 additions & 2 deletions testnet/stacks-node/src/config.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
use std::collections::HashSet;
use std::fs;
use std::net::{SocketAddr, ToSocketAddrs};
use std::path::PathBuf;
use std::sync::{Arc, Mutex};
use std::time::Duration;
use std::{fs, thread};

use clarity::vm::costs::ExecutionCost;
use clarity::vm::types::{AssetIdentifier, PrincipalData, QualifiedContractIdentifier};
Expand Down Expand Up @@ -1927,7 +1927,42 @@ impl NodeConfig {
let pubkey = Secp256k1PublicKey::from_hex(pubkey_str)
.unwrap_or_else(|_| panic!("Invalid public key '{pubkey_str}'"));
debug!("Resolve '{}'", &hostport);
let sockaddr = hostport.to_socket_addrs().unwrap().next().unwrap();

let mut attempts = 0;
let max_attempts = 5;
let mut delay = Duration::from_secs(2);

let sockaddr = loop {
match hostport.to_socket_addrs() {
Ok(mut addrs) => {
if let Some(addr) = addrs.next() {
break addr;
} else {
panic!("No addresses found for '{}'", hostport);
}
}
Err(e) => {
if attempts >= max_attempts {
panic!(
"Failed to resolve '{}' after {} attempts: {}",
hostport, max_attempts, e
);
} else {
error!(
"Attempt {} - Failed to resolve '{}': {}. Retrying in {:?}...",
attempts + 1,
hostport,
e,
delay
);
thread::sleep(delay);
attempts += 1;
delay *= 2;
}
}
}
};

let neighbor = NodeConfig::default_neighbor(sockaddr, pubkey, chain_id, peer_version);
self.bootstrap_node.push(neighbor);
}
Expand Down
18 changes: 16 additions & 2 deletions testnet/stacks-node/src/run_loop/nakamoto.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ use std::thread::JoinHandle;
use std::{cmp, thread};

use stacks::burnchains::bitcoin::address::{BitcoinAddress, LegacyBitcoinAddressType};
use stacks::burnchains::Burnchain;
use stacks::burnchains::{Burnchain, Error as burnchain_error};
use stacks::chainstate::burn::db::sortdb::SortitionDB;
use stacks::chainstate::burn::BlockSnapshot;
use stacks::chainstate::coordinator::comm::{CoordinatorChannels, CoordinatorReceivers};
Expand Down Expand Up @@ -400,13 +400,27 @@ impl RunLoop {

// setup the termination handler, allow it to error if a prior runloop already set it
neon::RunLoop::setup_termination_handler(self.should_keep_running.clone(), true);
let mut burnchain = neon::RunLoop::instantiate_burnchain_state(

let burnchain_result = neon::RunLoop::instantiate_burnchain_state(
&self.config,
self.should_keep_running.clone(),
burnchain_opt,
coordinator_senders.clone(),
);

let mut burnchain = match burnchain_result {
Ok(burnchain_controller) => burnchain_controller,
Err(burnchain_error::ShutdownInitiated) => {
info!("Exiting stacks-node");
return;
}
Err(e) => {
error!("Error initializing burnchain: {}", e);
info!("Exiting stacks-node");
return;
}
};

let burnchain_config = burnchain.get_burnchain();
self.burnchain = Some(burnchain_config.clone());

Expand Down
45 changes: 34 additions & 11 deletions testnet/stacks-node/src/run_loop/neon.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use std::{cmp, thread};

use libc;
use stacks::burnchains::bitcoin::address::{BitcoinAddress, LegacyBitcoinAddressType};
use stacks::burnchains::Burnchain;
use stacks::burnchains::{Burnchain, Error as burnchain_error};
use stacks::chainstate::burn::db::sortdb::SortitionDB;
use stacks::chainstate::burn::BlockSnapshot;
use stacks::chainstate::coordinator::comm::{CoordinatorChannels, CoordinatorReceivers};
Expand All @@ -31,7 +31,7 @@ use stacks_common::util::{get_epoch_time_secs, sleep_ms};
use stx_genesis::GenesisData;

use super::RunLoopCallbacks;
use crate::burnchains::make_bitcoin_indexer;
use crate::burnchains::{make_bitcoin_indexer, Error};
use crate::globals::NeonGlobals as Globals;
use crate::monitoring::{start_serving_monitoring_metrics, MonitoringError};
use crate::neon_node::{StacksNode, BLOCK_PROCESSOR_STACK_SIZE, RELAYER_MAX_BUFFER};
Expand Down Expand Up @@ -393,13 +393,13 @@ impl RunLoop {
should_keep_running: Arc<AtomicBool>,
burnchain_opt: Option<Burnchain>,
coordinator_senders: CoordinatorChannels,
) -> BitcoinRegtestController {
) -> Result<BitcoinRegtestController, burnchain_error> {
// Initialize and start the burnchain.
let mut burnchain_controller = BitcoinRegtestController::with_burnchain(
config.clone(),
Some(coordinator_senders),
burnchain_opt,
Some(should_keep_running),
Some(should_keep_running.clone()),
);

let burnchain = burnchain_controller.get_burnchain();
Expand Down Expand Up @@ -448,13 +448,21 @@ impl RunLoop {
}
};

match burnchain_controller.start(Some(target_burnchain_block_height)) {
Ok(_) => {}
Err(e) => {
burnchain_controller
.start(Some(target_burnchain_block_height))
.map_err(|e| {
match e {
Error::CoordinatorClosed => {
if !should_keep_running.load(Ordering::SeqCst) {
info!("Shutdown initiated during burnchain initialization: {}", e);
return burnchain_error::ShutdownInitiated;
}
}
Error::IndexerError(_) => {}
}
error!("Burnchain controller stopped: {}", e);
panic!();
}
};
})?;

// if the chainstate DBs don't exist, this will instantiate them
if let Err(e) = burnchain_controller.connect_dbs() {
Expand All @@ -464,7 +472,7 @@ impl RunLoop {

// TODO (hack) instantiate the sortdb in the burnchain
let _ = burnchain_controller.sortdb_mut();
burnchain_controller
Ok(burnchain_controller)
}

/// Boot up the stacks chainstate.
Expand Down Expand Up @@ -514,6 +522,7 @@ impl RunLoop {
get_bulk_initial_names: Some(Box::new(move || get_names(use_test_genesis_data))),
};

info!("About to call open_and_exec");
let (chain_state_db, receipts) = StacksChainState::open_and_exec(
self.config.is_mainnet(),
self.config.burnchain.chain_id,
Expand Down Expand Up @@ -1007,13 +1016,27 @@ impl RunLoop {
.expect("Run loop already started, can only start once after initialization.");

Self::setup_termination_handler(self.should_keep_running.clone(), false);
let mut burnchain = Self::instantiate_burnchain_state(

let burnchain_result = Self::instantiate_burnchain_state(
&self.config,
self.should_keep_running.clone(),
burnchain_opt,
coordinator_senders.clone(),
);

let mut burnchain = match burnchain_result {
Ok(burnchain_controller) => burnchain_controller,
Err(burnchain_error::ShutdownInitiated) => {
info!("Exiting stacks-node");
return;
}
Err(e) => {
error!("Error initializing burnchain: {}", e);
info!("Exiting stacks-node");
return;
}
};

let burnchain_config = burnchain.get_burnchain();
self.burnchain = Some(burnchain_config.clone());

Expand Down

0 comments on commit 8adedb5

Please sign in to comment.