Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

validator: Add --wait-for-super-majority to facilitate asynchronous cluster restarts #7701

Merged
merged 3 commits into from Jan 7, 2020
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
110 changes: 81 additions & 29 deletions core/src/validator.rs
Expand Up @@ -48,7 +48,8 @@ use std::{
sync::atomic::{AtomicBool, Ordering},
sync::mpsc::Receiver,
sync::{Arc, Mutex, RwLock},
thread::Result,
thread::{sleep, Result},
time::Duration,
};

#[derive(Clone, Debug)]
Expand All @@ -67,6 +68,7 @@ pub struct ValidatorConfig {
pub broadcast_stage_type: BroadcastStageType,
pub partition_cfg: Option<PartitionCfg>,
pub fixed_leader_schedule: Option<FixedSchedule>,
pub wait_for_super_majority: bool,
}

impl Default for ValidatorConfig {
Expand All @@ -86,6 +88,7 @@ impl Default for ValidatorConfig {
broadcast_stage_type: BroadcastStageType::Standard,
partition_cfg: None,
fixed_leader_schedule: None,
wait_for_super_majority: false,
}
}
}
Expand Down Expand Up @@ -138,27 +141,7 @@ impl Validator {

warn!("identity pubkey: {:?}", id);
warn!("vote pubkey: {:?}", vote_account);
warn!(
"CUDA is {}abled",
if solana_perf::perf_libs::api().is_some() {
"en"
} else {
"dis"
}
);

// Validator binaries built on a machine with AVX support will generate invalid opcodes
// when run on machines without AVX causing a non-obvious process abort. Instead detect
// the mismatch and error cleanly.
#[target_feature(enable = "avx")]
{
if is_x86_feature_detected!("avx") {
info!("AVX detected");
} else {
error!("Your machine does not have AVX support, please rebuild from source on your machine");
process::exit(1);
}
}
report_target_features();

info!("entrypoint: {:?}", entrypoint_info_option);

Expand Down Expand Up @@ -293,14 +276,7 @@ impl Validator {
if config.snapshot_config.is_some() {
poh_recorder.set_bank(&bank);
}

let poh_recorder = Arc::new(Mutex::new(poh_recorder));
let poh_service = PohService::new(poh_recorder.clone(), &poh_config, &exit);
assert_eq!(
blocktree.new_shreds_signals.len(),
1,
"New shred signal for the TVU should be the same as the clear bank signal."
);

let ip_echo_server = solana_net_utils::ip_echo_server(node.sockets.ip_echo.unwrap());

Expand All @@ -321,6 +297,22 @@ impl Validator {
.set_entrypoint(entrypoint_info.clone());
}

if config.wait_for_super_majority {
info!(
"Waiting more than 66% of activated stake at slot {} to be in gossip...",
bank.slot()
);
loop {
let gossip_stake_percent = get_stake_percent_in_gossip(&bank, &cluster_info);

info!("{}% of activated stake in gossip", gossip_stake_percent,);
if gossip_stake_percent > 66 {
break;
}
sleep(Duration::new(1, 0));
}
}

let sockets = Sockets {
repair: node
.sockets
Expand Down Expand Up @@ -353,6 +345,13 @@ impl Validator {
Some(voting_keypair.clone())
};

let poh_service = PohService::new(poh_recorder.clone(), &poh_config, &exit);
assert_eq!(
blocktree.new_shreds_signals.len(),
1,
"New shred signal for the TVU should be the same as the clear bank signal."
);

let tvu = Tvu::new(
vote_account,
voting_keypair,
Expand Down Expand Up @@ -584,6 +583,59 @@ pub fn new_validator_for_tests() -> (Validator, ContactInfo, Keypair, PathBuf) {
(node, contact_info, mint_keypair, ledger_path)
}

fn report_target_features() {
warn!(
"CUDA is {}abled",
if solana_perf::perf_libs::api().is_some() {
"en"
} else {
"dis"
}
);

// Validator binaries built on a machine with AVX support will generate invalid opcodes
// when run on machines without AVX causing a non-obvious process abort. Instead detect
// the mismatch and error cleanly.
#[target_feature(enable = "avx")]
{
if is_x86_feature_detected!("avx") {
info!("AVX detected");
} else {
error!("Your machine does not have AVX support, please rebuild from source on your machine");
process::exit(1);
}
}
}

// Get the activated stake percentage (based on the provided bank) that is visible in gossip
fn get_stake_percent_in_gossip(
bank: &Arc<solana_runtime::bank::Bank>,
cluster_info: &Arc<RwLock<ClusterInfo>>,
) -> u64 {
let mut gossip_stake = 0;
let mut total_activated_stake = 0;
let tvu_peers = cluster_info.read().unwrap().tvu_peers();

for (activated_stake, vote_account) in bank.vote_accounts().values() {
let vote_state =
solana_vote_program::vote_state::VoteState::from(&vote_account).unwrap_or_default();
total_activated_stake += activated_stake;
if tvu_peers
.iter()
.any(|peer| peer.id == vote_state.node_pubkey)
{
trace!(
"observed {} in gossip, (activated_stake={})",
vote_state.node_pubkey,
activated_stake
);
gossip_stake += activated_stake;
}
}

gossip_stake * 100 / total_activated_stake
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
7 changes: 7 additions & 0 deletions validator/src/main.rs
Expand Up @@ -537,6 +537,12 @@ pub fn main() {
.takes_value(true)
.help("Redirect logging to the specified file, '-' for standard error"),
)
.arg(
Arg::with_name("wait_for_super_majority")
.long("wait-for-super-majority")
mvines marked this conversation as resolved.
Show resolved Hide resolved
.takes_value(false)
.help("After processing the ledger, wait until a super majority of stake is visible on gossip before starting PoH"),
mvines marked this conversation as resolved.
Show resolved Hide resolved
)
.get_matches();

let identity_keypair = Arc::new(
Expand Down Expand Up @@ -582,6 +588,7 @@ pub fn main() {
validator_config.dev_halt_at_slot = value_t!(matches, "dev_halt_at_slot", Slot).ok();

validator_config.rpc_config.enable_validator_exit = matches.is_present("enable_rpc_exit");
validator_config.wait_for_super_majority = matches.is_present("wait_for_super_majority");

validator_config.rpc_config.faucet_addr = matches.value_of("rpc_faucet_addr").map(|address| {
solana_net_utils::parse_host_port(address).expect("failed to parse faucet address")
Expand Down