From b1af20a5add18ab2df08fca05f7d54a78ad7a617 Mon Sep 17 00:00:00 2001 From: Nikolay Kurtov Date: Tue, 25 Oct 2022 14:36:26 +0200 Subject: [PATCH 001/103] Prototype --- chain/client/src/sync.rs | 2 +- chain/network/src/client.rs | 40 +++ chain/network/src/network_protocol/mod.rs | 296 +++++++++++++++++- .../src/network_protocol/network.proto | 2 + chain/network/src/peer/peer_actor.rs | 145 +++++++-- 5 files changed, 445 insertions(+), 40 deletions(-) diff --git a/chain/client/src/sync.rs b/chain/client/src/sync.rs index 22842cace9e..d1c216a935c 100644 --- a/chain/client/src/sync.rs +++ b/chain/client/src/sync.rs @@ -1223,7 +1223,7 @@ impl StateSync { state_split_scheduler: &dyn Fn(StateSplitRequest), ) -> Result { let _span = tracing::debug_span!(target: "sync", "run", sync = "StateSync").entered(); - debug!(target: "sync", %sync_hash, ?new_shard_sync, ?tracking_shards, "syncing state"); + debug!(target: "sync", %sync_hash, ?tracking_shards, "syncing state"); let prev_hash = *chain.get_block_header(&sync_hash)?.prev_hash(); let now = Clock::utc(); diff --git a/chain/network/src/client.rs b/chain/network/src/client.rs index 4361b6ac7e3..4c19f8b711c 100644 --- a/chain/network/src/client.rs +++ b/chain/network/src/client.rs @@ -15,6 +15,7 @@ use near_primitives::sharding::PartialEncodedChunk; use near_primitives::transaction::SignedTransaction; use near_primitives::types::{AccountId, EpochId, ShardId}; use near_primitives::views::FinalExecutionOutcomeView; +use tracing::Instrument; /// A strongly typed asynchronous API for the Client logic. /// It abstracts away the fact that client is implemented using actix @@ -43,6 +44,7 @@ impl Client { account_id: AccountId, tx_hash: CryptoHash, ) -> Result, ReasonForBan> { + let _span = tracing::trace_span!(target: "network", "tx_status_request").entered(); match self .view_client_addr .send( @@ -52,6 +54,7 @@ impl Client { } .with_span_context(), ) + .in_current_span() .await { Ok(NetworkViewClientResponses::TxStatus(tx_result)) => Ok(Some(*tx_result)), @@ -68,12 +71,14 @@ impl Client { &self, tx_result: FinalExecutionOutcomeView, ) -> Result<(), ReasonForBan> { + let _span = tracing::trace_span!(target: "network", "tx_status_response").entered(); match self .view_client_addr .send( NetworkViewClientMessages::TxStatusResponse(Box::new(tx_result.clone())) .with_span_context(), ) + .in_current_span() .await { Ok(NetworkViewClientResponses::NoResponse) => Ok(()), @@ -91,6 +96,7 @@ impl Client { shard_id: ShardId, sync_hash: CryptoHash, ) -> Result, ReasonForBan> { + let _span = tracing::trace_span!(target: "network", "state_request_header").entered(); match self .view_client_addr .send( @@ -100,6 +106,7 @@ impl Client { } .with_span_context(), ) + .in_current_span() .await { Ok(NetworkViewClientResponses::StateResponse(resp)) => Ok(Some(*resp)), @@ -119,6 +126,7 @@ impl Client { sync_hash: CryptoHash, part_id: u64, ) -> Result, ReasonForBan> { + let _span = tracing::trace_span!(target: "network", "state_request_part").entered(); match self .view_client_addr .send( @@ -129,6 +137,7 @@ impl Client { } .with_span_context(), ) + .in_current_span() .await { Ok(NetworkViewClientResponses::StateResponse(resp)) => Ok(Some(*resp)), @@ -143,9 +152,11 @@ impl Client { } pub async fn state_response(&self, info: StateResponseInfo) -> Result<(), ReasonForBan> { + let _span = tracing::trace_span!(target: "network", "state_response").entered(); match self .client_addr .send(NetworkClientMessages::StateResponse(info).with_span_context()) + .in_current_span() .await { Ok(NetworkClientResponses::NoResponse) => Ok(()), @@ -163,9 +174,11 @@ impl Client { approval: Approval, peer_id: PeerId, ) -> Result<(), ReasonForBan> { + let _span = tracing::trace_span!(target: "network", "block_approval").entered(); match self .client_addr .send(NetworkClientMessages::BlockApproval(approval, peer_id).with_span_context()) + .in_current_span() .await { Ok(NetworkClientResponses::NoResponse) => Ok(()), @@ -183,12 +196,14 @@ impl Client { transaction: SignedTransaction, is_forwarded: bool, ) -> Result<(), ReasonForBan> { + let _span = tracing::trace_span!(target: "network", "transaction").entered(); match self .client_addr .send( NetworkClientMessages::Transaction { transaction, is_forwarded, check_only: false } .with_span_context(), ) + .in_current_span() .await { Ok(NetworkClientResponses::ValidTx) => Ok(()), @@ -211,12 +226,15 @@ impl Client { req: PartialEncodedChunkRequestMsg, msg_hash: CryptoHash, ) -> Result<(), ReasonForBan> { + let _span = + tracing::trace_span!(target: "network", "partial_encoded_chunk_request").entered(); match self .client_addr .send( NetworkClientMessages::PartialEncodedChunkRequest(req, msg_hash) .with_span_context(), ) + .in_current_span() .await { Ok(NetworkClientResponses::NoResponse) => Ok(()), @@ -234,12 +252,15 @@ impl Client { resp: PartialEncodedChunkResponseMsg, timestamp: time::Instant, ) -> Result<(), ReasonForBan> { + let _span = + tracing::trace_span!(target: "network", "partial_encoded_chunk_response").entered(); match self .client_addr .send( NetworkClientMessages::PartialEncodedChunkResponse(resp, timestamp.into()) .with_span_context(), ) + .in_current_span() .await { Ok(NetworkClientResponses::NoResponse) => Ok(()), @@ -256,9 +277,11 @@ impl Client { &self, chunk: PartialEncodedChunk, ) -> Result<(), ReasonForBan> { + let _span = tracing::trace_span!(target: "network", "partial_encoded_chunk").entered(); match self .client_addr .send(NetworkClientMessages::PartialEncodedChunk(chunk).with_span_context()) + .in_current_span() .await { Ok(NetworkClientResponses::NoResponse) => Ok(()), @@ -275,9 +298,12 @@ impl Client { &self, msg: PartialEncodedChunkForwardMsg, ) -> Result<(), ReasonForBan> { + let _span = + tracing::trace_span!(target: "network", "partial_encoded_chunk_forward").entered(); match self .client_addr .send(NetworkClientMessages::PartialEncodedChunkForward(msg).with_span_context()) + .in_current_span() .await { Ok(NetworkClientResponses::NoResponse) => Ok(()), @@ -291,9 +317,11 @@ impl Client { } pub async fn block_request(&self, hash: CryptoHash) -> Result, ReasonForBan> { + let _span = tracing::trace_span!(target: "network", "block_request").entered(); match self .view_client_addr .send(NetworkViewClientMessages::BlockRequest(hash).with_span_context()) + .in_current_span() .await { Ok(NetworkViewClientResponses::Block(block)) => Ok(Some(*block)), @@ -311,9 +339,11 @@ impl Client { &self, hashes: Vec, ) -> Result>, ReasonForBan> { + let _span = tracing::trace_span!(target: "network", "block_headers_request").entered(); match self .view_client_addr .send(NetworkViewClientMessages::BlockHeadersRequest(hashes).with_span_context()) + .in_current_span() .await { Ok(NetworkViewClientResponses::BlockHeaders(block_headers)) => Ok(Some(block_headers)), @@ -333,9 +363,11 @@ impl Client { peer_id: PeerId, was_requested: bool, ) -> Result<(), ReasonForBan> { + let _span = tracing::trace_span!(target: "network", "block").entered(); match self .client_addr .send(NetworkClientMessages::Block(block, peer_id, was_requested).with_span_context()) + .in_current_span() .await { Ok(NetworkClientResponses::NoResponse) => Ok(()), @@ -353,9 +385,11 @@ impl Client { headers: Vec, peer_id: PeerId, ) -> Result<(), ReasonForBan> { + let _span = tracing::trace_span!(target: "network", "block_headers").entered(); match self .client_addr .send(NetworkClientMessages::BlockHeaders(headers, peer_id).with_span_context()) + .in_current_span() .await { Ok(NetworkClientResponses::NoResponse) => Ok(()), @@ -369,9 +403,11 @@ impl Client { } pub async fn challenge(&self, challenge: Challenge) -> Result<(), ReasonForBan> { + let _span = tracing::trace_span!(target: "network", "challenge").entered(); match self .client_addr .send(NetworkClientMessages::Challenge(challenge).with_span_context()) + .in_current_span() .await { Ok(NetworkClientResponses::NoResponse) => Ok(()), @@ -385,9 +421,11 @@ impl Client { } pub async fn network_info(&self, info: NetworkInfo) { + let _span = tracing::trace_span!(target: "network", "network_info").entered(); match self .client_addr .send(NetworkClientMessages::NetworkInfo(info).with_span_context()) + .in_current_span() .await { Ok(NetworkClientResponses::NoResponse) => {} @@ -400,9 +438,11 @@ impl Client { &self, accounts: Vec<(AnnounceAccount, Option)>, ) -> Result, ReasonForBan> { + let _span = tracing::trace_span!(target: "network", "announce_account").entered(); match self .view_client_addr .send(NetworkViewClientMessages::AnnounceAccount(accounts).with_span_context()) + .in_current_span() .await { Ok(NetworkViewClientResponses::AnnounceAccount(accounts)) => Ok(accounts), diff --git a/chain/network/src/network_protocol/mod.rs b/chain/network/src/network_protocol/mod.rs index ba70f5eb87c..0ef5d3fd122 100644 --- a/chain/network/src/network_protocol/mod.rs +++ b/chain/network/src/network_protocol/mod.rs @@ -21,6 +21,7 @@ pub use _proto::network as proto; use crate::time; use borsh::{BorshDeserialize as _, BorshSerialize as _}; +use itertools::Itertools; use near_crypto::PublicKey; use near_crypto::Signature; use near_primitives::block::{Approval, Block, BlockHeader, GenesisId}; @@ -38,10 +39,16 @@ use near_primitives::types::{AccountId, EpochId}; use near_primitives::types::{BlockHeight, ShardId}; use near_primitives::validator_signer::ValidatorSigner; use near_primitives::views::FinalExecutionOutcomeView; +use opentelemetry::propagation::{Extractor, Injector, TextMapPropagator}; +use opentelemetry::trace::{SpanContext, SpanId, TraceContextExt, TraceFlags, TraceId, TraceState}; +use opentelemetry::{Context, ContextGuard}; use protobuf::Message as _; -use std::collections::HashSet; +use std::collections::{HashMap, HashSet}; use std::fmt; +use std::fmt::Debug; +use std::str::Utf8Error; use std::sync::Arc; +use tracing::Span; #[derive(PartialEq, Eq, Clone, Debug, Hash)] pub struct PeerAddr { @@ -262,6 +269,189 @@ pub enum PeerMessage { EpochSyncFinalizationResponse(Box), } +/* +#[derive(Debug, Default)] +struct NodeTraceContextInjectorExtractor { + hash_map: HashMap, +} + +impl Injector for NodeTraceContextInjectorExtractor { + fn set(&mut self, key: &str, value: String) { + tracing::warn!("Injector::set '{}' '{}'", key, value); + self.hash_map.set(key, value) + } +} + +impl Extractor for NodeTraceContextInjectorExtractor { + fn get(&self, key: &str) -> Option<&str> { + tracing::warn!("Extractor::get '{}'", key); + self.hash_map.get(key).map(|x| &**x) + } + + fn keys(&self) -> Vec<&str> { + tracing::warn!("Extractor::keys"); + self.hash_map.keys().map(|x| &**x).collect() + } +} + */ + +const NODE_TRACE_ID_HEADER: &str = "x-node-trace-id"; +const NODE_PARENT_ID_HEADER: &str = "x-node-parent-id"; +const NODE_SAMPLING_PRIORITY_HEADER: &str = "x-node-sampling-priority"; + +const TRACE_FLAG_DEFERRED: TraceFlags = TraceFlags::new(0x02); + +use near_o11y::OpenTelemetrySpanExt; +use once_cell::sync::Lazy; +use opentelemetry::propagation::text_map_propagator::FieldIter; + +static NODE_HEADER_FIELDS: Lazy> = Lazy::new(|| { + vec![ + NODE_TRACE_ID_HEADER.to_string(), + NODE_PARENT_ID_HEADER.to_string(), + NODE_SAMPLING_PRIORITY_HEADER.to_string(), + ] +}); + +enum SamplingPriority { + UserReject = -1, + AutoReject = 0, + AutoKeep = 1, + UserKeep = 2, +} + +#[derive(Debug)] +enum ExtractError { + TraceId, + SpanId, + SamplingPriority, +} + +#[derive(Debug, Default, Clone)] +struct NodePropagator {} + +impl NodePropagator { + pub fn new() -> Self { + NodePropagator::default() + } + fn extract_trace_id(&self, trace_id: &str) -> Result { + trace_id + .parse::() + .map(|id| TraceId::from((id as u128).to_be_bytes())) + .map_err(|_| ExtractError::TraceId) + } + + fn extract_span_id(&self, span_id: &str) -> Result { + span_id + .parse::() + .map(|id| SpanId::from(id.to_be_bytes())) + .map_err(|_| ExtractError::SpanId) + } + + fn extract_sampling_priority( + &self, + sampling_priority: &str, + ) -> Result { + let i = sampling_priority.parse::().map_err(|_| ExtractError::SamplingPriority)?; + + match i { + -1 => Ok(SamplingPriority::UserReject), + 0 => Ok(SamplingPriority::AutoReject), + 1 => Ok(SamplingPriority::AutoKeep), + 2 => Ok(SamplingPriority::UserKeep), + _ => Err(ExtractError::SamplingPriority), + } + } + + fn extract_span_context(&self, extractor: &dyn Extractor) -> Result { + let trace_id = self.extract_trace_id(extractor.get(NODE_TRACE_ID_HEADER).unwrap_or(""))?; + // If we have a trace_id but can't get the parent span, we default it to invalid instead of completely erroring + // out so that the rest of the spans aren't completely lost + let span_id = self + .extract_span_id(extractor.get(NODE_PARENT_ID_HEADER).unwrap_or("")) + .unwrap_or(SpanId::INVALID); + let sampling_priority = self + .extract_sampling_priority(extractor.get(NODE_SAMPLING_PRIORITY_HEADER).unwrap_or("")); + let sampled = match sampling_priority { + Ok(SamplingPriority::UserReject) | Ok(SamplingPriority::AutoReject) => { + TraceFlags::default() + } + Ok(SamplingPriority::UserKeep) | Ok(SamplingPriority::AutoKeep) => TraceFlags::SAMPLED, + // Treat the sampling as DEFERRED instead of erroring on extracting the span context + Err(_) => TRACE_FLAG_DEFERRED, + }; + + let trace_state = TraceState::default(); + + Ok(SpanContext::new(trace_id, span_id, sampled, true, trace_state)) + } +} + +impl TextMapPropagator for NodePropagator { + fn inject_context(&self, cx: &Context, injector: &mut dyn Injector) { + tracing::warn!("injector cx: '{:#?}', has active span: '{:#?}'", cx, cx.has_active_span()); + let span = cx.span(); + // let span2 = Span::current(); + let span_context = span.span_context(); + if span_context.is_valid() { + tracing::warn!( + "injector cx: '{:#?}', trace_id: '{:#?}', span_id: '{:#?}'", + cx, + span_context.trace_id(), + span_context.span_id() + ); + injector.set( + NODE_TRACE_ID_HEADER, + (u128::from_be_bytes(span_context.trace_id().to_bytes()) as u128).to_string(), + ); + tracing::warn!( + "injector trace_id: {}, from_be_bytes: {}, from_be_bytes_to_string: {}", + span_context.trace_id(), + u128::from_be_bytes(span_context.trace_id().to_bytes()), + (u128::from_be_bytes(span_context.trace_id().to_bytes()) as u128).to_string(), + ); + injector.set( + NODE_PARENT_ID_HEADER, + u64::from_be_bytes(span_context.span_id().to_bytes()).to_string(), + ); + tracing::warn!( + "injector span_id: {}, from_be_bytes: {}, from_be_bytes_to_string: {}", + span_context.span_id(), + u64::from_be_bytes(span_context.span_id().to_bytes()), + (u64::from_be_bytes(span_context.span_id().to_bytes()) as u64).to_string(), + ); + + if span_context.trace_flags() & TRACE_FLAG_DEFERRED != TRACE_FLAG_DEFERRED { + let sampling_priority = if span_context.is_sampled() { + SamplingPriority::AutoKeep + } else { + SamplingPriority::AutoReject + }; + + injector.set(NODE_SAMPLING_PRIORITY_HEADER, (sampling_priority as i32).to_string()); + } + } else { + tracing::warn!( + "span_context invalid trace_id: '{:#?}', span_id: '{:#?}'", + span_context.trace_id(), + span_context.span_id() + ); + } + } + + fn extract_with_context(&self, cx: &Context, extractor: &dyn Extractor) -> Context { + let extracted = + self.extract_span_context(extractor).unwrap_or_else(|_| SpanContext::empty_context()); + tracing::warn!("extracted SpanContext: {:#?}", extracted); + + cx.with_remote_span_context(extracted) + } + + fn fields(&self) -> FieldIter<'_> { + FieldIter::new(NODE_HEADER_FIELDS.as_ref()) + } +} + impl fmt::Display for PeerMessage { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fmt::Display::fmt(self.msg_variant(), f) @@ -286,11 +476,54 @@ pub enum ParsePeerMessageError { ProtoConv(#[source] proto_conv::ParsePeerMessageError), } +#[derive(Debug, PartialEq)] +struct ValueA(u64); + +#[derive(thiserror::Error, Debug)] +pub enum HeadersError { + #[error("ToStrError")] + ToStrError(#[source] Utf8Error), +} + +fn deserialize_headers(s: &[u8]) -> Result, HeadersError> { + let s = std::str::from_utf8(s).map_err(HeadersError::ToStrError)?; + let res = s + .split(";") + .filter_map(|kv| { + if let Some((k, v)) = kv.split_once("=") { + Some((k.to_owned(), v.to_owned())) + } else { + None + } + }) + .collect(); + Ok(res) +} + +fn serialize_headers(extractor: &dyn Extractor) -> Vec { + let serialized_str = + extractor.keys().iter().map(|k| format!("{}={}", k, extractor.get(k).unwrap())).join(";"); + serialized_str.into_bytes() +} + impl PeerMessage { pub(crate) fn serialize(&self, enc: Encoding) -> Vec { + let _span = tracing::warn_span!(target: "network", "serialize").entered(); + tracing::warn!("PeerMessage::serialize {:#?}", enc); match enc { Encoding::Borsh => borsh_::PeerMessage::from(self).try_to_vec().unwrap(), - Encoding::Proto => proto::PeerMessage::from(self).write_to_bytes().unwrap(), + Encoding::Proto => { + let mut msg = proto::PeerMessage::from(self); + let propagator = NodePropagator::new(); + let cx = Span::current().context(); + let mut headers = HashMap::new(); + propagator.inject_context(&cx, &mut headers); + tracing::warn!("headers: {:#?}", headers); + if !headers.is_empty() { + msg.trace_context = serialize_headers(&headers); + } + msg.write_to_bytes().unwrap() + } } } @@ -303,13 +536,64 @@ impl PeerMessage { .map_err(ParsePeerMessageError::BorshDecode)?) .try_into() .map_err(ParsePeerMessageError::BorshConv)?, - Encoding::Proto => (&proto::PeerMessage::parse_from_bytes(data) - .map_err(ParsePeerMessageError::ProtoDecode)?) - .try_into() - .map_err(ParsePeerMessageError::ProtoConv)?, + Encoding::Proto => { + let res1: proto::PeerMessage = proto::PeerMessage::parse_from_bytes(data) + .map_err(ParsePeerMessageError::ProtoDecode)?; + tracing::warn!("res1: {:?}", res1); + let res2: PeerMessage = (&res1).try_into().map_err(|err| { + tracing::warn!("try_into failed: {:#?}", err); + ParsePeerMessageError::ProtoConv(err) + })?; + tracing::warn!("res2: {:#?}", res2); + res2 + } }) } + pub(crate) fn deserialize_with_remote_context( + enc: Encoding, + data: &[u8], + ) -> Result<(PeerMessage, Option), ParsePeerMessageError> { + let _span = + tracing::trace_span!(target: "network", "deserialize_with_remote_context").entered(); + match enc { + Encoding::Borsh => { + let res = (&borsh_::PeerMessage::try_from_slice(data) + .map_err(ParsePeerMessageError::BorshDecode)?) + .try_into() + .map_err(ParsePeerMessageError::BorshConv)?; + Ok((res, None)) + } + Encoding::Proto => { + let mut context_guard = None; + let res1: proto::PeerMessage = proto::PeerMessage::parse_from_bytes(data) + .map_err(ParsePeerMessageError::ProtoDecode)?; + tracing::warn!("res1: {:#?}", res1); + if !res1.trace_context.is_empty() { + let propagator = NodePropagator::new(); + let headers = deserialize_headers(&res1.trace_context); + tracing::warn!("deserialize headers: {:#?}", headers); + if let Ok(headers) = headers { + let cx = Span::current().context(); + let cx = propagator.extract_with_context(&cx, &headers); + tracing::warn!("cx: {:#?}", cx); + tracing::warn!("attaching context"); + context_guard = Some(cx.attach()); + tracing::warn!("attached context"); + } else { + tracing::warn!("Failed to parse headers: {:#?}", headers); + } + } + let res2: PeerMessage = (&res1).try_into().map_err(|err| { + tracing::warn!("try_into failed: {:#?}", err); + ParsePeerMessageError::ProtoConv(err) + })?; + tracing::warn!("res2: {:#?}", res2); + Ok((res2, context_guard)) + } + } + } + pub(crate) fn msg_variant(&self) -> &'static str { match self { PeerMessage::Routed(routed_msg) => routed_msg.body_variant(), diff --git a/chain/network/src/network_protocol/network.proto b/chain/network/src/network_protocol/network.proto index fcb6f7334cd..571b52f39c2 100644 --- a/chain/network/src/network_protocol/network.proto +++ b/chain/network/src/network_protocol/network.proto @@ -356,6 +356,8 @@ message PeerMessage { // Deprecated fields. reserved 24; + bytes trace_context = 26; + oneof message_type { Handshake handshake = 4; HandshakeFailure handshake_failure = 5; diff --git a/chain/network/src/peer/peer_actor.rs b/chain/network/src/peer/peer_actor.rs index b97e1c753af..f5b87a2f5ac 100644 --- a/chain/network/src/peer/peer_actor.rs +++ b/chain/network/src/peer/peer_actor.rs @@ -37,13 +37,14 @@ use near_primitives::utils::DisplayOption; use near_primitives::version::{ ProtocolVersion, PEER_MIN_ALLOWED_PROTOCOL_VERSION, PROTOCOL_VERSION, }; +use opentelemetry::ContextGuard; use parking_lot::Mutex; use std::fmt::Debug; use std::io; use std::net::SocketAddr; use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::Arc; -use tracing::{debug, error, info, warn}; +use tracing::{debug, error, info, warn, Instrument}; /// Maximum number of messages per minute from single peer. // TODO(#5453): current limit is way to high due to us sending lots of messages during sync. @@ -273,6 +274,24 @@ impl PeerActor { return PeerMessage::deserialize(Encoding::Borsh, msg); } + fn parse_message_with_remote_context( + &mut self, + msg: &[u8], + ) -> Result<(PeerMessage, Option), ParsePeerMessageError> { + let _span = + tracing::trace_span!(target: "network", "parse_message_with_remote_context").entered(); + if let Some(e) = self.encoding() { + return PeerMessage::deserialize_with_remote_context(e, msg); + } + if let Ok((msg, guard)) = PeerMessage::deserialize_with_remote_context(Encoding::Proto, msg) + { + tracing::warn!("deserialized, has_guard: {}", guard.is_some()); + self.protocol_buffers_supported = true; + return Ok((msg, guard)); + } + return PeerMessage::deserialize_with_remote_context(Encoding::Borsh, msg); + } + fn send_message_or_log(&self, msg: &PeerMessage) { self.send_message(msg); } @@ -528,7 +547,7 @@ impl PeerActor { let conn = conn.clone(); wrap_future(async move { loop { - interval.tick().await; + interval.tick().in_current_span().await; let sent = tracker.lock().sent_bytes.minute_stats(&clock); let received = tracker.lock().received_bytes.minute_stats(&clock); conn.stats @@ -700,52 +719,68 @@ impl PeerActor { msg_hash: CryptoHash, body: RoutedMessageBody, ) -> Result, ReasonForBan> { + let _span = tracing::warn_span!(target: "network", "receive_routed_message").entered(); Ok(match body { RoutedMessageBody::TxStatusRequest(account_id, tx_hash) => network_state .client .tx_status_request(account_id, tx_hash) + .in_current_span() .await? .map(RoutedMessageBody::TxStatusResponse), RoutedMessageBody::TxStatusResponse(tx_result) => { - network_state.client.tx_status_response(tx_result).await?; + network_state.client.tx_status_response(tx_result).in_current_span().await?; None } RoutedMessageBody::StateRequestHeader(shard_id, sync_hash) => network_state .client .state_request_header(shard_id, sync_hash) + .in_current_span() .await? .map(RoutedMessageBody::VersionedStateResponse), RoutedMessageBody::StateRequestPart(shard_id, sync_hash, part_id) => network_state .client .state_request_part(shard_id, sync_hash, part_id) + .in_current_span() .await? .map(RoutedMessageBody::VersionedStateResponse), RoutedMessageBody::VersionedStateResponse(info) => { - network_state.client.state_response(info).await?; + network_state.client.state_response(info).in_current_span().await?; None } RoutedMessageBody::BlockApproval(approval) => { - network_state.client.block_approval(approval, peer_id).await?; + network_state.client.block_approval(approval, peer_id).in_current_span().await?; None } RoutedMessageBody::ForwardTx(transaction) => { - network_state.client.transaction(transaction, /*is_forwarded=*/ true).await?; + network_state + .client + .transaction(transaction, /*is_forwarded=*/ true) + .in_current_span() + .await?; None } RoutedMessageBody::PartialEncodedChunkRequest(request) => { - network_state.client.partial_encoded_chunk_request(request, msg_hash).await?; + network_state + .client + .partial_encoded_chunk_request(request, msg_hash) + .in_current_span() + .await?; None } RoutedMessageBody::PartialEncodedChunkResponse(response) => { - network_state.client.partial_encoded_chunk_response(response, clock.now()).await?; + network_state + .client + .partial_encoded_chunk_response(response, clock.now()) + .in_current_span() + .await?; None } RoutedMessageBody::VersionedPartialEncodedChunk(chunk) => { - network_state.client.partial_encoded_chunk(chunk).await?; + network_state.client.partial_encoded_chunk(chunk).in_current_span().await?; None } RoutedMessageBody::PartialEncodedChunkForward(msg) => { - network_state.client.partial_encoded_chunk_forward(msg).await?; + network_state.client.partial_encoded_chunk_forward(msg).in_current_span().await?; None } RoutedMessageBody::ReceiptOutcomeRequest(_) => { @@ -768,6 +803,8 @@ impl PeerActor { conn: &connection::Connection, msg: PeerMessage, ) { + let span = tracing::trace_span!( target: "network", "receive_message"); + let span_guard = span.enter(); // This is a fancy way to clone the message iff event_sink is non-null. // If you have a better idea on how to achieve that, feel free to improve this. let message_processed_event = self @@ -788,11 +825,16 @@ impl PeerActor { let clock = self.clock.clone(); let network_state = self.network_state.clone(); let peer_id = conn.peer_info.id.clone(); + drop(span_guard); ctx.spawn(wrap_future(async move { + let _span = tracing::warn_span!(target: "network", "receive_routed_message_span").entered(); + tracing::warn!("receive_routed_message !1"); Ok(match msg { PeerMessage::Routed(msg) => { let msg_hash = msg.hash(); - Self::receive_routed_message(&clock, &network_state, peer_id, msg_hash, msg.msg.body).await?.map( + Self::receive_routed_message(&clock, &network_state, peer_id, msg_hash, msg.msg.body) + .in_current_span() + .await?.map( |body| { PeerMessage::Routed(network_state.sign_message( &clock, @@ -802,33 +844,46 @@ impl PeerActor { ) } PeerMessage::BlockRequest(hash) => { - network_state.client.block_request(hash).await?.map(PeerMessage::Block) + network_state.client.block_request(hash) + .in_current_span() + .await?.map(PeerMessage::Block) } PeerMessage::BlockHeadersRequest(hashes) => { - network_state.client.block_headers_request(hashes).await?.map(PeerMessage::BlockHeaders) + network_state.client.block_headers_request(hashes) + .in_current_span() + .await?.map(PeerMessage::BlockHeaders) } PeerMessage::Block(block) => { - network_state.client.block(block, peer_id, was_requested).await?; + network_state.client.block(block, peer_id, was_requested) + .in_current_span() + .await?; None } PeerMessage::Transaction(transaction) => { - network_state.client.transaction(transaction, /*is_forwarded=*/ false).await?; + network_state.client.transaction(transaction, /*is_forwarded=*/ false) + .in_current_span() + .await?; None } PeerMessage::BlockHeaders(headers) => { - network_state.client.block_headers(headers, peer_id).await?; + network_state.client.block_headers(headers, peer_id) + .in_current_span() + .await?; None } PeerMessage::Challenge(challenge) => { - network_state.client.challenge(challenge).await?; + network_state.client.challenge(challenge) + .in_current_span() + .await?; None } msg => { tracing::error!(target: "network", "Peer received unexpected type: {:?}", msg); None } - })}) + })}.in_current_span()) .map(|res, act: &mut PeerActor, ctx| { + tracing::warn!("receive_routed_message !2"); match res { // TODO(gprusak): make sure that for routed messages we drop routeback info correctly. Ok(Some(resp)) => act.send_message_or_log(&resp), @@ -846,6 +901,11 @@ impl PeerActor { conn: &connection::Connection, peer_msg: PeerMessage, ) { + let _span = tracing::trace_span!( + target: "network", + "handle_msg_ready") + .entered(); + match peer_msg.clone() { PeerMessage::Disconnect => { debug!(target: "network", "Disconnect signal. Me: {:?} Peer: {:?}", self.my_node_info.id, self.other_peer_id()); @@ -944,8 +1004,12 @@ impl PeerActor { return None; } // Verify and add the new data to the internal state. - let (new_data, err) = - pms.accounts_data.clone().insert(msg.accounts_data).await; + let (new_data, err) = pms + .accounts_data + .clone() + .insert(msg.accounts_data) + .in_current_span() + .await; // Broadcast any new data we have found, even in presence of an error. // This will prevent a malicious peer from forcing us to re-verify valid // datasets. See accounts_data::Cache documentation for details. @@ -959,7 +1023,7 @@ impl PeerActor { .filter(|p| peer_id != p.peer_info.id) .map(|p| p.send_accounts_data(new_data.clone())) .collect(); - futures_util::future::join_all(handles).await; + futures_util::future::join_all(handles).in_current_span().await; } err.map(|err| match err { accounts_data::Error::InvalidSignature => { @@ -1069,15 +1133,16 @@ impl PeerActor { self.network_state .validate_edges_and_add_to_routing_table(conn.peer_info.id.clone(), edges); ctx.spawn( - wrap_future(async move { network_state.client.announce_account(accounts).await }).then( - move |res, act: &mut PeerActor, ctx| { - match res { - Err(ban_reason) => act.stop(ctx, ClosingReason::Ban(ban_reason)), - Ok(accounts) => act.network_state.broadcast_accounts(accounts), - } - wrap_future(async {}) - }, - ), + wrap_future(async move { + network_state.client.announce_account(accounts).in_current_span().await + }) + .then(move |res, act: &mut PeerActor, ctx| { + match res { + Err(ban_reason) => act.stop(ctx, ClosingReason::Ban(ban_reason)), + Ok(accounts) => act.network_state.broadcast_accounts(accounts), + } + wrap_future(async {}) + }), ); } } @@ -1196,7 +1261,12 @@ impl actix::Handler for PeerActor { type Result = (); #[perf] fn handle(&mut self, stream::Frame(msg): stream::Frame, ctx: &mut Self::Context) { - let _span = tracing::trace_span!(target: "network", "handle", handler = "bytes").entered(); + let _span = tracing::trace_span!( + target: "network", + "handle", + handler = "bytes", + actor = "PeerActor") + .entered(); // TODO(#5155) We should change our code to track size of messages received from Peer // as long as it travels to PeerManager, etc. @@ -1206,13 +1276,22 @@ impl actix::Handler for PeerActor { } self.update_stats_on_receiving_message(msg.len()); - let mut peer_msg = match self.parse_message(&msg) { - Ok(msg) => msg, + let (mut peer_msg, guard) = match self.parse_message_with_remote_context(&msg) { + Ok((msg, guard)) => (msg, guard), Err(err) => { debug!(target: "network", "Received invalid data {} from {}: {}", pretty::AbbrBytes(&msg), self.peer_info, err); return; } }; + tracing::warn!("parsed, has_guard: {}", guard.is_some()); + + let _span2 = tracing::trace_span!( + target: "network", + "handle-with-guard", + handler = "bytes", + has_guard = (guard.is_some()), + actor = "PeerActor") + .entered(); match &peer_msg { PeerMessage::Routed(msg) => { From c1aaf21b99d80cdff655dc339f8069d904b15fbb Mon Sep 17 00:00:00 2001 From: mzhangmzz <34969888+mzhangmzz@users.noreply.github.com> Date: Thu, 20 Oct 2022 11:00:36 -0400 Subject: [PATCH 002/103] move the check for is_height_processed forward (#7855) Move the check for is_height_processed before process_block_header. Previously, this check happens after, which means, the node will re-process the block header (which takes a few ms) and re-broadcast an invalid block before drops it. In the case when there are many invalid blocks circulating in the network, this can cause the node to be too busy, --- chain/chain/src/chain.rs | 5 +++++ chain/client/src/client.rs | 18 ------------------ chain/client/src/client_actor.rs | 17 +++++++++++++++++ .../src/tests/client/process_blocks.rs | 3 +++ 4 files changed, 25 insertions(+), 18 deletions(-) diff --git a/chain/chain/src/chain.rs b/chain/chain/src/chain.rs index c2b888ee833..df404b62b51 100644 --- a/chain/chain/src/chain.rs +++ b/chain/chain/src/chain.rs @@ -4288,6 +4288,11 @@ impl Chain { self.blocks_in_processing.contains(hash) } + #[inline] + pub fn is_height_processed(&self, height: BlockHeight) -> Result { + self.store.is_height_processed(height) + } + /// Check if can sync with sync_hash pub fn check_sync_hash_validity(&self, sync_hash: &CryptoHash) -> Result { let head = self.head()?; diff --git a/chain/client/src/client.rs b/chain/client/src/client.rs index 62d0049e121..dc9c159f85d 100644 --- a/chain/client/src/client.rs +++ b/chain/client/src/client.rs @@ -817,24 +817,6 @@ impl Client { provenance: Provenance, apply_chunks_done_callback: DoneApplyChunkCallback, ) -> Result<(), near_chain::Error> { - let is_requested = match provenance { - Provenance::PRODUCED | Provenance::SYNC => true, - Provenance::NONE => false, - }; - // Drop the block if a) it is not requested, b) we already processed this height, - // c) it is not building on top of current head - if !is_requested - && block.header().prev_hash() - != &self - .chain - .head() - .map_or_else(|_| CryptoHash::default(), |tip| tip.last_block_hash) - { - if self.chain.store().is_height_processed(block.header().height())? { - return Ok(()); - } - } - let mut block_processing_artifacts = BlockProcessingArtifact::default(); let result = { diff --git a/chain/client/src/client_actor.rs b/chain/client/src/client_actor.rs index 6f915d31d40..424f7d02287 100644 --- a/chain/client/src/client_actor.rs +++ b/chain/client/src/client_actor.rs @@ -1419,6 +1419,23 @@ impl ClientActor { debug!(target: "client", tail_height = tail, "Dropping a block that is too far behind."); return; } + // drop the block if a) it is not requested, b) we already processed this height, c) it is not building on top of current head + // Note that this check must happen before process_block where we try to validate block + // header and rebroadcast blocks, otherwise blocks that failed processing could be + // processed and rebroadcasted again and again. + if !was_requested + && block.header().prev_hash() + != &self + .client + .chain + .head() + .map_or_else(|_| CryptoHash::default(), |tip| tip.last_block_hash) + { + if self.client.chain.is_height_processed(block.header().height()).unwrap_or_default() { + debug!(target: "client", height = block.header().height(), "Dropping a block because we've seen this height before and we didn't request it"); + return; + } + } let prev_hash = *block.header().prev_hash(); let provenance = if was_requested { near_chain::Provenance::SYNC } else { near_chain::Provenance::NONE }; diff --git a/integration-tests/src/tests/client/process_blocks.rs b/integration-tests/src/tests/client/process_blocks.rs index 81022391eb9..69492fe85cc 100644 --- a/integration-tests/src/tests/client/process_blocks.rs +++ b/integration-tests/src/tests/client/process_blocks.rs @@ -2129,6 +2129,9 @@ fn test_sync_hash_validity() { } /// Only process one block per height +/// Temporarily disable this test because the is_height_processed check is moved to client actor +/// TODO (Min): refactor client actor receive_block code to move it to client +#[ignore] #[test] fn test_not_process_height_twice() { let mut env = TestEnv::builder(ChainGenesis::test()).build(); From 6e685fdeeb79b2d870547284616128d9a55323f1 Mon Sep 17 00:00:00 2001 From: mm-near <91919554+mm-near@users.noreply.github.com> Date: Thu, 20 Oct 2022 17:37:09 +0200 Subject: [PATCH 003/103] Added peer store information to network page (#7761) Display a list of peers stored in peer store - together with information on when we attempted to connect to them. You can see it working in: http://34.147.53.32:3030/debug/pages/network_info This is at the bottom of the page - and you have to click the button to fetch this info (as this is often over 10k peers - and loading takes a while). --- chain/jsonrpc-primitives/src/types/status.rs | 20 +++++- chain/jsonrpc/jsonrpc-tests/src/lib.rs | 1 + chain/jsonrpc/res/network_info.html | 72 +++++++++++++++++++ chain/jsonrpc/src/api/status.rs | 37 +++++++++- chain/jsonrpc/src/lib.rs | 60 ++++++++++++---- chain/network/src/debug.rs | 16 +++++ chain/network/src/lib.rs | 1 + .../src/peer_manager/peer_manager_actor.rs | 37 +++++++++- core/primitives/src/views.rs | 18 +++++ nearcore/src/lib.rs | 1 + tools/mock-node/src/setup.rs | 1 + 11 files changed, 245 insertions(+), 19 deletions(-) create mode 100644 chain/network/src/debug.rs diff --git a/chain/jsonrpc-primitives/src/types/status.rs b/chain/jsonrpc-primitives/src/types/status.rs index 730798dcf94..7e335ecb98f 100644 --- a/chain/jsonrpc-primitives/src/types/status.rs +++ b/chain/jsonrpc-primitives/src/types/status.rs @@ -1,3 +1,7 @@ +use near_client_primitives::debug::{ + DebugBlockStatus, EpochInfoView, TrackedShardsView, ValidatorStatus, +}; +use near_primitives::views::{CatchupStatusView, PeerStoreView, SyncStatusView}; use serde::{Deserialize, Serialize}; #[derive(Debug, Serialize, Deserialize)] @@ -6,10 +10,24 @@ pub struct RpcStatusResponse { pub status_response: near_primitives::views::StatusResponse, } +#[derive(Serialize, Debug)] +pub enum DebugStatusResponse { + SyncStatus(SyncStatusView), + CatchupStatus(Vec), + TrackedShards(TrackedShardsView), + // List of epochs - in descending order (next epoch is first). + EpochInfo(Vec), + // Detailed information about blocks. + BlockStatus(Vec), + // Detailed information about the validator (approvals, block & chunk production etc.) + ValidatorStatus(ValidatorStatus), + PeerStore(PeerStoreView), +} + #[cfg(feature = "debug_types")] #[derive(Debug, Serialize)] pub struct RpcDebugStatusResponse { - pub status_response: near_client_primitives::debug::DebugStatusResponse, + pub status_response: DebugStatusResponse, } #[derive(Debug, Serialize, Deserialize)] diff --git a/chain/jsonrpc/jsonrpc-tests/src/lib.rs b/chain/jsonrpc/jsonrpc-tests/src/lib.rs index 0ea6aa47dbd..dd4e392ecb3 100644 --- a/chain/jsonrpc/jsonrpc-tests/src/lib.rs +++ b/chain/jsonrpc/jsonrpc-tests/src/lib.rs @@ -45,6 +45,7 @@ pub fn start_all_with_validity_period_and_no_epoch_sync( TEST_GENESIS_CONFIG.clone(), client_addr, view_client_addr.clone(), + None, ); (view_client_addr, addr) } diff --git a/chain/jsonrpc/res/network_info.html b/chain/jsonrpc/res/network_info.html index af6e3d3aa4c..7721ff7aed5 100644 --- a/chain/jsonrpc/res/network_info.html +++ b/chain/jsonrpc/res/network_info.html @@ -155,6 +155,7 @@ } $(document).ready(() => { + $('.detailed-peer-storage-div').hide(); $('span').text("Loading..."); $.ajax({ type: "GET", @@ -274,6 +275,57 @@ }) }); + + function to_human_time(seconds) { + let result = ""; + if (seconds >= 60) { + let minutes = Math.floor(seconds / 60); + seconds = seconds % 60; + if (minutes > 60) { + let hours = Math.floor(minutes / 60); + minutes = minutes % 60; + if (hours > 24) { + let days = Math.floor(hours / 24); + hours = hours % 24; + result += days + " days "; + } + result += hours + " h "; + } + result += minutes + " m "; + } + result += seconds + " s" + return result; + } + + function show_peer_storage() { + $(".detailed-peer-storage-button").text("Loading..."); + $(".tbody-detailed-peer-storage").html(""); + $.ajax({ + type: "GET", + url: "/debug/api/peer_store", + success: data => { + $(".detailed-peer-storage-size").text(data.status_response.PeerStore.peer_states.length); + data.status_response.PeerStore.peer_states.forEach(element => { + let row = $(""); + row.append($("").append(element['peer_id'])); + row.append($("").append(element['addr'])); + + let first_seen = + row.append($("").append(to_human_time(Math.floor(Date.now() / 1000) - element['first_seen']))); + row.append($("").append(to_human_time(Math.floor(Date.now() / 1000) - element['last_seen']))); + if (element['last_attempt'] != null) { + row.append($("").append(to_human_time(Math.floor(Date.now() / 1000) - element['last_attempt']))); + } else { + row.append($("")); + } + row.append($("").append(element['status'])); + $(".tbody-detailed-peer-storage").append(row); + }); + $(".detailed-peer-storage-div").show(); + $(".detailed-peer-storage-button").text("Refresh"); + } + }); + } @@ -349,6 +401,26 @@

+
+ +
+

Peers in storage:

+ + + + + + + + + + + + +
Peer idPeer addressFirst seenLast seenLast connection attemptStatus
+
\ No newline at end of file diff --git a/chain/jsonrpc/src/api/status.rs b/chain/jsonrpc/src/api/status.rs index 2354145674e..6a81124a7c3 100644 --- a/chain/jsonrpc/src/api/status.rs +++ b/chain/jsonrpc/src/api/status.rs @@ -19,10 +19,41 @@ impl RpcFrom for RpcStatusResponse { } impl RpcFrom - for near_jsonrpc_primitives::types::status::RpcDebugStatusResponse + for near_jsonrpc_primitives::types::status::DebugStatusResponse { - fn rpc_from(status_response: near_client_primitives::debug::DebugStatusResponse) -> Self { - Self { status_response } + fn rpc_from(response: near_client_primitives::debug::DebugStatusResponse) -> Self { + match response { + near_client_primitives::debug::DebugStatusResponse::SyncStatus(x) => { + near_jsonrpc_primitives::types::status::DebugStatusResponse::SyncStatus(x) + } + near_client_primitives::debug::DebugStatusResponse::CatchupStatus(x) => { + near_jsonrpc_primitives::types::status::DebugStatusResponse::CatchupStatus(x) + } + near_client_primitives::debug::DebugStatusResponse::TrackedShards(x) => { + near_jsonrpc_primitives::types::status::DebugStatusResponse::TrackedShards(x) + } + near_client_primitives::debug::DebugStatusResponse::EpochInfo(x) => { + near_jsonrpc_primitives::types::status::DebugStatusResponse::EpochInfo(x) + } + near_client_primitives::debug::DebugStatusResponse::BlockStatus(x) => { + near_jsonrpc_primitives::types::status::DebugStatusResponse::BlockStatus(x) + } + near_client_primitives::debug::DebugStatusResponse::ValidatorStatus(x) => { + near_jsonrpc_primitives::types::status::DebugStatusResponse::ValidatorStatus(x) + } + } + } +} + +impl RpcFrom + for near_jsonrpc_primitives::types::status::DebugStatusResponse +{ + fn rpc_from(response: near_network::debug::DebugStatus) -> Self { + match response { + near_network::debug::DebugStatus::PeerStore(x) => { + near_jsonrpc_primitives::types::status::DebugStatusResponse::PeerStore(x) + } + } } } diff --git a/chain/jsonrpc/src/lib.rs b/chain/jsonrpc/src/lib.rs index 0682e670826..edad2dbed03 100644 --- a/chain/jsonrpc/src/lib.rs +++ b/chain/jsonrpc/src/lib.rs @@ -3,13 +3,14 @@ use std::path::PathBuf; use std::time::{Duration, Instant}; -use actix::Addr; +use actix::{Addr, MailboxError}; use actix_cors::Cors; use actix_web::http::header; use actix_web::HttpRequest; use actix_web::{get, http, middleware, web, App, Error as HttpError, HttpResponse, HttpServer}; use futures::Future; use futures::FutureExt; +use near_network::PeerManagerActor; use serde::{Deserialize, Serialize}; use serde_json::{json, Value}; use tokio::time::{sleep, timeout}; @@ -220,6 +221,7 @@ fn process_query_response( struct JsonRpcHandler { client_addr: Addr, view_client_addr: Addr, + peer_manager_addr: Option>, polling_config: RpcPollingConfig, genesis_config: GenesisConfig, enable_debug_rpc: bool, @@ -419,6 +421,19 @@ impl JsonRpcHandler { .map_err(RpcFrom::rpc_from) } + async fn peer_manager_send(&self, msg: M) -> Result + where + PeerManagerActor: actix::Handler, + M: actix::Message + Send + 'static, + M::Result: Send, + E: RpcFrom, + { + match &self.peer_manager_addr { + Some(peer_manager_addr) => peer_manager_addr.send(msg).await.map_err(RpcFrom::rpc_from), + None => Err(RpcFrom::rpc_from(MailboxError::Closed)), + } + } + async fn send_tx_async( &self, request_data: near_jsonrpc_primitives::types::transactions::RpcBroadcastTransactionRequest, @@ -758,18 +773,35 @@ impl JsonRpcHandler { near_jsonrpc_primitives::types::status::RpcStatusError, > { if self.enable_debug_rpc { - let debug_status = match path { - "/debug/api/tracked_shards" => self.client_send(DebugStatus::TrackedShards).await?, - "/debug/api/sync_status" => self.client_send(DebugStatus::SyncStatus).await?, - "/debug/api/catchup_status" => self.client_send(DebugStatus::CatchupStatus).await?, - "/debug/api/epoch_info" => self.client_send(DebugStatus::EpochInfo).await?, - "/debug/api/block_status" => self.client_send(DebugStatus::BlockStatus).await?, - "/debug/api/validator_status" => { - self.client_send(DebugStatus::ValidatorStatus).await? - } - _ => return Ok(None), - }; - return Ok(Some(debug_status.rpc_into())); + let debug_status: near_jsonrpc_primitives::types::status::DebugStatusResponse = + match path { + "/debug/api/tracked_shards" => { + self.client_send(DebugStatus::TrackedShards).await?.rpc_into() + } + "/debug/api/sync_status" => { + self.client_send(DebugStatus::SyncStatus).await?.rpc_into() + } + "/debug/api/catchup_status" => { + self.client_send(DebugStatus::CatchupStatus).await?.rpc_into() + } + "/debug/api/epoch_info" => { + self.client_send(DebugStatus::EpochInfo).await?.rpc_into() + } + "/debug/api/block_status" => { + self.client_send(DebugStatus::BlockStatus).await?.rpc_into() + } + "/debug/api/validator_status" => { + self.client_send(DebugStatus::ValidatorStatus).await?.rpc_into() + } + "/debug/api/peer_store" => self + .peer_manager_send(near_network::debug::GetDebugStatus::PeerStore) + .await? + .rpc_into(), + _ => return Ok(None), + }; + return Ok(Some(near_jsonrpc_primitives::types::status::RpcDebugStatusResponse { + status_response: debug_status, + })); } else { return Ok(None); } @@ -1404,6 +1436,7 @@ pub fn start_http( genesis_config: GenesisConfig, client_addr: Addr, view_client_addr: Addr, + peer_manager_addr: Option>, ) -> Vec<(&'static str, actix_web::dev::ServerHandle)> { let RpcConfig { addr, @@ -1424,6 +1457,7 @@ pub fn start_http( .app_data(web::Data::new(JsonRpcHandler { client_addr: client_addr.clone(), view_client_addr: view_client_addr.clone(), + peer_manager_addr: peer_manager_addr.clone(), polling_config, genesis_config: genesis_config.clone(), enable_debug_rpc, diff --git a/chain/network/src/debug.rs b/chain/network/src/debug.rs new file mode 100644 index 00000000000..7bdaf176312 --- /dev/null +++ b/chain/network/src/debug.rs @@ -0,0 +1,16 @@ +use ::actix::Message; +use near_primitives::views::PeerStoreView; + +// Different debug requests that can be sent by HTML pages, via GET. +pub enum GetDebugStatus { + PeerStore, +} + +#[derive(actix::MessageResponse, Debug)] +pub enum DebugStatus { + PeerStore(PeerStoreView), +} + +impl Message for GetDebugStatus { + type Result = DebugStatus; +} diff --git a/chain/network/src/lib.rs b/chain/network/src/lib.rs index 2c51e440a43..5e1df024cc3 100644 --- a/chain/network/src/lib.rs +++ b/chain/network/src/lib.rs @@ -15,6 +15,7 @@ pub mod blacklist; pub mod client; pub mod config; pub mod config_json; +pub mod debug; pub mod routing; pub mod tcp; pub mod test_utils; diff --git a/chain/network/src/peer_manager/peer_manager_actor.rs b/chain/network/src/peer_manager/peer_manager_actor.rs index e4f97284dba..2cb7c061168 100644 --- a/chain/network/src/peer_manager/peer_manager_actor.rs +++ b/chain/network/src/peer_manager/peer_manager_actor.rs @@ -1,5 +1,6 @@ use crate::client; use crate::config; +use crate::debug::{DebugStatus, GetDebugStatus}; use crate::network_protocol::{ AccountData, AccountOrPeerIdOrHash, Edge, EdgeState, PartialEdgeInfo, PeerInfo, PeerMessage, Ping, Pong, RawRoutedMessage, RoutedMessageBody, RoutingTableUpdate, StateResponseInfo, @@ -36,6 +37,7 @@ use near_performance_metrics_macros::perf; use near_primitives::block::GenesisId; use near_primitives::network::{AnnounceAccount, PeerId}; use near_primitives::types::AccountId; +use near_primitives::views::{KnownPeerStateView, PeerStoreView}; use parking_lot::RwLock; use rand::seq::IteratorRandom; use rand::thread_rng; @@ -143,6 +145,9 @@ pub struct PeerManagerActor { whitelist_nodes: Vec, pub(crate) state: Arc, + + /// Last time when we tried to establish connection to this peer. + last_peer_outbound_attempt: HashMap, } /// TEST-ONLY @@ -299,6 +304,7 @@ impl PeerManagerActor { started_connect_attempts: false, local_peer_pending_update_nonce_request: HashMap::new(), whitelist_nodes, + last_peer_outbound_attempt: Default::default(), state: Arc::new(NetworkState::new( config.clone(), genesis_id, @@ -878,7 +884,7 @@ impl PeerManagerActor { self.started_connect_attempts = true; interval = default_interval; } - + self.last_peer_outbound_attempt.insert(peer_info.id.clone(), self.clock.now_utc()); ctx.spawn(wrap_future({ let state = self.state.clone(); let clock = self.clock.clone(); @@ -1324,7 +1330,6 @@ impl PeerManagerActor { self.handle_msg_network_requests(msg, ctx), ) } - // TEST-ONLY PeerManagerMessageRequest::OutboundTcpConnect(stream) => { let peer_addr = stream.peer_addr; if let Err(err) = @@ -1581,3 +1586,31 @@ impl Handler> for PeerManagerActor { self.handle_peer_manager_message(msg, ctx) } } + +impl Handler for PeerManagerActor { + type Result = DebugStatus; + fn handle(&mut self, msg: GetDebugStatus, _ctx: &mut Context) -> Self::Result { + match msg { + GetDebugStatus::PeerStore => { + let mut peer_states_view = self + .peer_store + .iter() + .map(|(peer_id, known_peer_state)| KnownPeerStateView { + peer_id: peer_id.clone(), + status: format!("{:?}", known_peer_state.status), + addr: format!("{:?}", known_peer_state.peer_info.addr), + first_seen: known_peer_state.first_seen.unix_timestamp(), + last_seen: known_peer_state.last_seen.unix_timestamp(), + last_attempt: self + .last_peer_outbound_attempt + .get(peer_id) + .map(|it| it.unix_timestamp()), + }) + .collect::>(); + + peer_states_view.sort_by_key(|a| (-a.last_attempt.unwrap_or(0), -a.last_seen)); + DebugStatus::PeerStore(PeerStoreView { peer_states: peer_states_view }) + } + } + } +} diff --git a/core/primitives/src/views.rs b/core/primitives/src/views.rs index 78ad904651f..07e3eb0767a 100644 --- a/core/primitives/src/views.rs +++ b/core/primitives/src/views.rs @@ -26,6 +26,7 @@ use crate::errors::TxExecutionError; use crate::hash::{hash, CryptoHash}; use crate::logging; use crate::merkle::{combine_hash, MerklePath}; +use crate::network::PeerId; use crate::profile::Cost; use crate::receipt::{ActionReceipt, DataReceipt, DataReceiver, Receipt, ReceiptEnum}; use crate::serialize::{base64_format, dec_format, option_base64_format}; @@ -244,6 +245,18 @@ impl FromIterator for AccessKeyList { } } +#[cfg_attr(feature = "deepsize_feature", derive(deepsize::DeepSizeOf))] +#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)] +pub struct KnownPeerStateView { + pub peer_id: PeerId, + pub status: String, + pub addr: String, + pub first_seen: i64, + pub last_seen: i64, + pub last_attempt: Option, +} + +#[cfg_attr(feature = "deepsize_feature", derive(deepsize::DeepSizeOf))] #[derive(Debug, PartialEq, Eq, Clone)] pub enum QueryResponseKind { ViewAccount(AccountView), @@ -374,6 +387,11 @@ pub enum SyncStatusView { BodySync { start_height: BlockHeight, current_height: BlockHeight, highest_height: BlockHeight }, } +#[derive(Serialize, Deserialize, Debug, PartialEq, Eq)] +pub struct PeerStoreView { + pub peer_states: Vec, +} + #[derive(Serialize, Deserialize, Debug, PartialEq, Eq)] pub struct ShardSyncDownloadView { pub downloads: Vec, diff --git a/nearcore/src/lib.rs b/nearcore/src/lib.rs index d87fe616547..47f3f748673 100644 --- a/nearcore/src/lib.rs +++ b/nearcore/src/lib.rs @@ -203,6 +203,7 @@ pub fn start_with_config_and_synchronization( config.genesis.config.clone(), client_actor.clone(), view_client.clone(), + Some(network_actor.clone()), )); } diff --git a/tools/mock-node/src/setup.rs b/tools/mock-node/src/setup.rs index 89c0240687c..5263ebf58ad 100644 --- a/tools/mock-node/src/setup.rs +++ b/tools/mock-node/src/setup.rs @@ -295,6 +295,7 @@ pub fn setup_mock_node( config.genesis.config, client.clone(), view_client.clone(), + None, ) }); From 34f847e9c35677b966cf2ac8995585e7174b6b23 Mon Sep 17 00:00:00 2001 From: mzhangmzz <34969888+mzhangmzz@users.noreply.github.com> Date: Thu, 20 Oct 2022 18:34:11 -0400 Subject: [PATCH 004/103] Do not use flat storage for storage_write (#7885) As an intermediate step, we will only enable flat storage for storate_read, but not storage_write. --- core/store/src/lib.rs | 6 +-- core/store/src/trie/mod.rs | 41 +++++++++++++++---- core/store/src/trie/update.rs | 10 +++-- .../client/features/chunk_nodes_cache.rs | 19 ++------- runtime/near-vm-logic/Cargo.toml | 2 + runtime/near-vm-logic/src/dependencies.rs | 19 +++++++-- runtime/near-vm-logic/src/lib.rs | 2 +- runtime/near-vm-logic/src/logic.rs | 15 +++++-- .../near-vm-logic/src/mocks/mock_external.rs | 4 +- .../src/tests/storage_read_write.rs | 4 +- .../near-vm-runner/src/tests/ts_contract.rs | 4 +- runtime/runtime/Cargo.toml | 2 +- runtime/runtime/src/ext.rs | 21 +++++++--- 13 files changed, 99 insertions(+), 50 deletions(-) diff --git a/core/store/src/lib.rs b/core/store/src/lib.rs index ba90c1a08f7..30f1bc2a7f9 100644 --- a/core/store/src/lib.rs +++ b/core/store/src/lib.rs @@ -31,9 +31,9 @@ use crate::db::{ pub use crate::trie::iterator::TrieIterator; pub use crate::trie::update::{TrieUpdate, TrieUpdateIterator, TrieUpdateValuePtr}; pub use crate::trie::{ - estimator, split_state, ApplyStatePartResult, KeyForStateChanges, NibbleSlice, PartialStorage, - PrefetchApi, RawTrieNode, RawTrieNodeWithSize, ShardTries, Trie, TrieAccess, TrieCache, - TrieCachingStorage, TrieChanges, TrieConfig, TrieStorage, WrappedTrieChanges, + estimator, split_state, ApplyStatePartResult, KeyForStateChanges, KeyLookupMode, NibbleSlice, + PartialStorage, PrefetchApi, RawTrieNode, RawTrieNodeWithSize, ShardTries, Trie, TrieAccess, + TrieCache, TrieCachingStorage, TrieChanges, TrieConfig, TrieStorage, WrappedTrieChanges, }; pub use flat_state::FlatStateDelta; diff --git a/core/store/src/trie/mod.rs b/core/store/src/trie/mod.rs index 362ed95df3c..c316de9f6fc 100644 --- a/core/store/src/trie/mod.rs +++ b/core/store/src/trie/mod.rs @@ -10,6 +10,7 @@ use near_primitives::contract::ContractCode; use near_primitives::hash::{hash, CryptoHash}; pub use near_primitives::shard_layout::ShardUId; use near_primitives::state::ValueRef; +#[cfg(feature = "protocol_feature_flat_state")] use near_primitives::state_record::is_delayed_receipt_key; use near_primitives::trie_key::TrieKey; use near_primitives::types::{StateRoot, StateRootNode}; @@ -60,6 +61,12 @@ pub struct TrieCosts { pub node_cost: u64, } +/// Whether a key lookup will be performed through flat storage or through iterating the trie +pub enum KeyLookupMode { + FlatStorage, + Trie, +} + const TRIE_COSTS: TrieCosts = TrieCosts { byte_of_key: 2, byte_of_value: 1, node_cost: 50 }; #[derive(Clone, Hash)] @@ -873,19 +880,37 @@ impl Trie { } } - pub fn get_ref(&self, key: &[u8]) -> Result, StorageError> { - let is_delayed = is_delayed_receipt_key(key); - match &self.flat_state { - Some(flat_state) if !is_delayed => flat_state.get_ref(&key), - _ => { - let key = NibbleSlice::new(key); - self.lookup(key) + /// Return the value reference to the `key` + /// `mode`: whether we will try to perform the lookup through flat storage or trie. + /// Note that even if `mode == KeyLookupMode::FlatStorage`, we still may not use + /// flat storage if the trie is not created with a flat storage object in it. + /// Such double check may seem redundant but it is necessary for now. + /// Not all tries are created with flat storage, for example, we don't + /// enable flat storage for state-viewer. And we do not use flat + /// storage for key lookup performed in `storage_write`, so we need + /// the `use_flat_storage` to differentiate whether the lookup is performed for + /// storage_write or not. + #[allow(unused)] + pub fn get_ref( + &self, + key: &[u8], + mode: KeyLookupMode, + ) -> Result, StorageError> { + #[cfg(feature = "protocol_feature_flat_state")] + { + let is_delayed = is_delayed_receipt_key(key); + if matches!(mode, KeyLookupMode::FlatStorage) && !is_delayed { + if let Some(flat_state) = &self.flat_state { + return flat_state.get_ref(&key); + } } } + let key = NibbleSlice::new(key); + self.lookup(key) } pub fn get(&self, key: &[u8]) -> Result>, StorageError> { - match self.get_ref(key)? { + match self.get_ref(key, KeyLookupMode::FlatStorage)? { Some(ValueRef { hash, .. }) => { self.storage.retrieve_raw_bytes(&hash).map(|bytes| Some(bytes.to_vec())) } diff --git a/core/store/src/trie/update.rs b/core/store/src/trie/update.rs index c4fa7faab40..1b07c362c95 100644 --- a/core/store/src/trie/update.rs +++ b/core/store/src/trie/update.rs @@ -7,7 +7,7 @@ use near_primitives::types::{ }; pub use self::iterator::TrieUpdateIterator; -use crate::trie::TrieChanges; +use crate::trie::{KeyLookupMode, TrieChanges}; use crate::StorageError; use super::Trie; @@ -66,7 +66,11 @@ impl TrieUpdate { &self.trie } - pub fn get_ref(&self, key: &TrieKey) -> Result>, StorageError> { + pub fn get_ref( + &self, + key: &TrieKey, + mode: KeyLookupMode, + ) -> Result>, StorageError> { let key = key.to_vec(); if let Some(key_value) = self.prospective.get(&key) { return Ok(key_value.value.as_deref().map(TrieUpdateValuePtr::MemoryRef)); @@ -76,7 +80,7 @@ impl TrieUpdate { } } - self.trie.get_ref(&key).map(|option| { + self.trie.get_ref(&key, mode).map(|option| { option.map(|ValueRef { length, hash }| { TrieUpdateValuePtr::HashAndSize(&self.trie, length, hash) }) diff --git a/integration-tests/src/tests/client/features/chunk_nodes_cache.rs b/integration-tests/src/tests/client/features/chunk_nodes_cache.rs index a7ab21faa93..19a8998c75b 100644 --- a/integration-tests/src/tests/client/features/chunk_nodes_cache.rs +++ b/integration-tests/src/tests/client/features/chunk_nodes_cache.rs @@ -149,19 +149,8 @@ fn compare_node_counts() { }) .collect(); - if cfg!(feature = "protocol_feature_flat_state") { - // If flat storage is enabled, we shouldn't observe any trie node reads during transaction processing. - // For the first pair of write calls there are no contract values in storage, thus we see zero db reads. - assert_eq!(tx_node_counts[0], TrieNodesCount { db_reads: 0, mem_reads: 0 }); - // For all other write calls we read the value reference from flat storage and the value from state. - // The first read doesn't count, so we should observe only two DB reads each time. - (1..4).for_each(|i| { - assert_eq!(tx_node_counts[i], TrieNodesCount { db_reads: 2, mem_reads: 0 }) - }); - } else { - assert_eq!(tx_node_counts[0], TrieNodesCount { db_reads: 4, mem_reads: 0 }); - assert_eq!(tx_node_counts[1], TrieNodesCount { db_reads: 12, mem_reads: 0 }); - assert_eq!(tx_node_counts[2], TrieNodesCount { db_reads: 8, mem_reads: 4 }); - assert_eq!(tx_node_counts[3], TrieNodesCount { db_reads: 8, mem_reads: 4 }); - } + assert_eq!(tx_node_counts[0], TrieNodesCount { db_reads: 4, mem_reads: 0 }); + assert_eq!(tx_node_counts[1], TrieNodesCount { db_reads: 12, mem_reads: 0 }); + assert_eq!(tx_node_counts[2], TrieNodesCount { db_reads: 8, mem_reads: 4 }); + assert_eq!(tx_node_counts[3], TrieNodesCount { db_reads: 8, mem_reads: 4 }); } diff --git a/runtime/near-vm-logic/Cargo.toml b/runtime/near-vm-logic/Cargo.toml index c6c7a42660a..110668393f6 100644 --- a/runtime/near-vm-logic/Cargo.toml +++ b/runtime/near-vm-logic/Cargo.toml @@ -44,6 +44,8 @@ protocol_feature_fix_contract_loading_cost = [ protocol_feature_ed25519_verify = [ "near-primitives/protocol_feature_ed25519_verify" ] +protocol_feature_flat_state = [] + io_trace = ["tracing"] # Use this feature to enable counting of fees and costs applied. diff --git a/runtime/near-vm-logic/src/dependencies.rs b/runtime/near-vm-logic/src/dependencies.rs index f1bec041013..688aa0607d2 100644 --- a/runtime/near-vm-logic/src/dependencies.rs +++ b/runtime/near-vm-logic/src/dependencies.rs @@ -32,6 +32,12 @@ pub trait MemoryLike { fn write_memory(&mut self, offset: u64, buffer: &[u8]); } +/// This enum represents if a storage_get call will be performed through flat storage or trie +pub enum StorageGetMode { + FlatStorage, + Trie, +} + pub type Result = ::std::result::Result; /// Logical pointer to a value in storage. @@ -71,6 +77,7 @@ pub trait External { /// /// * `key` - the key to read /// + /// * `mode`- whether the lookup will be performed through flat storage or trie /// # Errors /// /// This function could return [`near_vm_errors::VMRunnerError::ExternalError`]. @@ -78,15 +85,19 @@ pub trait External { /// # Example /// ``` /// # use near_vm_logic::mocks::mock_external::MockedExternal; - /// # use near_vm_logic::{External, ValuePtr}; + /// # use near_vm_logic::{External, StorageGetMode, ValuePtr}; /// /// # let mut external = MockedExternal::new(); /// external.storage_set(b"key42", b"value1337").unwrap(); - /// assert_eq!(external.storage_get(b"key42").unwrap().map(|ptr| ptr.deref().unwrap()), Some(b"value1337".to_vec())); + /// assert_eq!(external.storage_get(b"key42", StorageGetMode::Trie).unwrap().map(|ptr| ptr.deref().unwrap()), Some(b"value1337".to_vec())); /// // Returns Ok(None) if there is no value for a key - /// assert_eq!(external.storage_get(b"no_key").unwrap().map(|ptr| ptr.deref().unwrap()), None); + /// assert_eq!(external.storage_get(b"no_key", StorageGetMode::Trie).unwrap().map(|ptr| ptr.deref().unwrap()), None); /// ``` - fn storage_get<'a>(&'a self, key: &[u8]) -> Result>>; + fn storage_get<'a>( + &'a self, + key: &[u8], + mode: StorageGetMode, + ) -> Result>>; /// Removes the `key` from the storage trie associated with the current account. /// diff --git a/runtime/near-vm-logic/src/lib.rs b/runtime/near-vm-logic/src/lib.rs index 68512c12432..4bc646e3cfc 100644 --- a/runtime/near-vm-logic/src/lib.rs +++ b/runtime/near-vm-logic/src/lib.rs @@ -14,7 +14,7 @@ pub mod types; mod utils; pub use context::VMContext; -pub use dependencies::{External, MemoryLike, ValuePtr}; +pub use dependencies::{External, MemoryLike, StorageGetMode, ValuePtr}; pub use logic::{VMLogic, VMOutcome}; pub use near_primitives_core::config::*; pub use near_primitives_core::profile; diff --git a/runtime/near-vm-logic/src/logic.rs b/runtime/near-vm-logic/src/logic.rs index 63aa450615b..2a1c44f96ce 100644 --- a/runtime/near-vm-logic/src/logic.rs +++ b/runtime/near-vm-logic/src/logic.rs @@ -4,7 +4,7 @@ use crate::gas_counter::{FastGasCounter, GasCounter}; use crate::receipt_manager::ReceiptManager; use crate::types::{PromiseIndex, PromiseResult, ReceiptIndex, ReturnData}; use crate::utils::split_method_names; -use crate::{ReceiptMetadata, ValuePtr}; +use crate::{ReceiptMetadata, StorageGetMode, ValuePtr}; use byteorder::ByteOrder; use near_crypto::Secp256K1Signature; use near_primitives::checked_feature; @@ -2395,7 +2395,9 @@ impl<'a> VMLogic<'a> { self.gas_counter.pay_per(storage_write_key_byte, key.len() as u64)?; self.gas_counter.pay_per(storage_write_value_byte, value.len() as u64)?; let nodes_before = self.ext.get_trie_nodes_count(); - let evicted_ptr = self.ext.storage_get(&key)?; + // For storage write, we need to first perform a read on the key to calculate the TTN cost. + // This storage_get must be performed through trie instead of through FlatStorage + let evicted_ptr = self.ext.storage_get(&key, StorageGetMode::Trie)?; let evicted = Self::deref_value(&mut self.gas_counter, storage_write_evicted_byte, evicted_ptr)?; let nodes_delta = self.ext.get_trie_nodes_count() - nodes_before; @@ -2486,7 +2488,10 @@ impl<'a> VMLogic<'a> { } self.gas_counter.pay_per(storage_read_key_byte, key.len() as u64)?; let nodes_before = self.ext.get_trie_nodes_count(); - let read = self.ext.storage_get(&key); + #[cfg(feature = "protocol_feature_flat_state")] + let read = self.ext.storage_get(&key, StorageGetMode::FlatStorage); + #[cfg(not(feature = "protocol_feature_flat_state"))] + let read = self.ext.storage_get(&key, StorageGetMode::Trie); let nodes_delta = self.ext.get_trie_nodes_count() - nodes_before; self.gas_counter.add_trie_fees(&nodes_delta)?; let read = Self::deref_value(&mut self.gas_counter, storage_read_value_byte, read?)?; @@ -2544,7 +2549,9 @@ impl<'a> VMLogic<'a> { } self.gas_counter.pay_per(storage_remove_key_byte, key.len() as u64)?; let nodes_before = self.ext.get_trie_nodes_count(); - let removed_ptr = self.ext.storage_get(&key)?; + // To delete a key, we need to first perform a read on the key to calculate the TTN cost. + // This storage_get must be performed through trie instead of through FlatStorage + let removed_ptr = self.ext.storage_get(&key, StorageGetMode::Trie)?; let removed = Self::deref_value(&mut self.gas_counter, storage_remove_ret_value_byte, removed_ptr)?; diff --git a/runtime/near-vm-logic/src/mocks/mock_external.rs b/runtime/near-vm-logic/src/mocks/mock_external.rs index 875a371b4a9..f72908788cf 100644 --- a/runtime/near-vm-logic/src/mocks/mock_external.rs +++ b/runtime/near-vm-logic/src/mocks/mock_external.rs @@ -1,4 +1,4 @@ -use crate::{External, ValuePtr}; +use crate::{External, StorageGetMode, ValuePtr}; use near_primitives::hash::{hash, CryptoHash}; use near_primitives::types::TrieNodesCount; use near_primitives_core::types::{AccountId, Balance}; @@ -49,7 +49,7 @@ impl External for MockedExternal { Ok(()) } - fn storage_get(&self, key: &[u8]) -> Result>> { + fn storage_get(&self, key: &[u8], _mode: StorageGetMode) -> Result>> { Ok(self .fake_trie .get(key) diff --git a/runtime/near-vm-logic/src/tests/storage_read_write.rs b/runtime/near-vm-logic/src/tests/storage_read_write.rs index 458138ee3d5..a64430759ea 100644 --- a/runtime/near-vm-logic/src/tests/storage_read_write.rs +++ b/runtime/near-vm-logic/src/tests/storage_read_write.rs @@ -1,6 +1,6 @@ use crate::tests::fixtures::get_context; use crate::tests::vm_logic_builder::VMLogicBuilder; -use crate::External; +use crate::{External, StorageGetMode}; #[test] fn test_storage_write_with_register() { @@ -15,7 +15,7 @@ fn test_storage_write_with_register() { logic.storage_write(u64::MAX, 1 as _, u64::MAX, 2 as _, 0).expect("storage write ok"); - let value_ptr = logic_builder.ext.storage_get(key).unwrap().unwrap(); + let value_ptr = logic_builder.ext.storage_get(key, StorageGetMode::Trie).unwrap().unwrap(); assert_eq!(value_ptr.deref().unwrap(), val.to_vec()); } diff --git a/runtime/near-vm-runner/src/tests/ts_contract.rs b/runtime/near-vm-runner/src/tests/ts_contract.rs index e69a7e2143b..58aaf5f2ac6 100644 --- a/runtime/near-vm-runner/src/tests/ts_contract.rs +++ b/runtime/near-vm-runner/src/tests/ts_contract.rs @@ -3,7 +3,7 @@ use near_primitives::runtime::fees::RuntimeFeesConfig; use near_vm_errors::{FunctionCallError, HostError}; use near_vm_logic::mocks::mock_external::MockedExternal; use near_vm_logic::types::ReturnData; -use near_vm_logic::{External, VMConfig}; +use near_vm_logic::{External, StorageGetMode, VMConfig}; use crate::tests::{create_context, with_vm_variants, LATEST_PROTOCOL_VERSION}; use crate::vm_kind::VMKind; @@ -55,7 +55,7 @@ pub fn test_ts_contract() { .expect("bad failure"); // Verify by looking directly into the storage of the host. { - let res = fake_external.storage_get(b"foo"); + let res = fake_external.storage_get(b"foo", StorageGetMode::Trie); let value_ptr = res.unwrap().unwrap(); let value = value_ptr.deref().unwrap(); let value = String::from_utf8(value).unwrap(); diff --git a/runtime/runtime/Cargo.toml b/runtime/runtime/Cargo.toml index 931625698b4..b40bb6a34e8 100644 --- a/runtime/runtime/Cargo.toml +++ b/runtime/runtime/Cargo.toml @@ -36,7 +36,7 @@ near-vm-runner = { path = "../../runtime/near-vm-runner" } [features] default = [] dump_errors_schema = ["near-vm-errors/dump_errors_schema"] -protocol_feature_flat_state = ["near-store/protocol_feature_flat_state"] +protocol_feature_flat_state = ["near-store/protocol_feature_flat_state", "near-vm-logic/protocol_feature_flat_state"] no_cpu_compatibility_checks = ["near-vm-runner/no_cpu_compatibility_checks"] no_cache = [ diff --git a/runtime/runtime/src/ext.rs b/runtime/runtime/src/ext.rs index cd240ababe0..230ef0da4c5 100644 --- a/runtime/runtime/src/ext.rs +++ b/runtime/runtime/src/ext.rs @@ -7,9 +7,9 @@ use near_primitives::types::{ }; use near_primitives::utils::create_data_id; use near_primitives::version::ProtocolVersion; -use near_store::{get_code, TrieUpdate, TrieUpdateValuePtr}; +use near_store::{get_code, KeyLookupMode, TrieUpdate, TrieUpdateValuePtr}; use near_vm_errors::{AnyError, VMLogicError}; -use near_vm_logic::{External, ValuePtr}; +use near_vm_logic::{External, StorageGetMode, ValuePtr}; pub struct RuntimeExt<'a> { trie_update: &'a mut TrieUpdate, @@ -111,10 +111,18 @@ impl<'a> External for RuntimeExt<'a> { Ok(()) } - fn storage_get<'b>(&'b self, key: &[u8]) -> ExtResult>> { + fn storage_get<'b>( + &'b self, + key: &[u8], + mode: StorageGetMode, + ) -> ExtResult>> { let storage_key = self.create_storage_key(key); + let mode = match mode { + StorageGetMode::FlatStorage => KeyLookupMode::FlatStorage, + StorageGetMode::Trie => KeyLookupMode::Trie, + }; self.trie_update - .get_ref(&storage_key) + .get_ref(&storage_key, mode) .map_err(wrap_storage_error) .map(|option| option.map(|ptr| Box::new(RuntimeExtValuePtr(ptr)) as Box<_>)) } @@ -127,7 +135,10 @@ impl<'a> External for RuntimeExt<'a> { fn storage_has_key(&mut self, key: &[u8]) -> ExtResult { let storage_key = self.create_storage_key(key); - self.trie_update.get_ref(&storage_key).map(|x| x.is_some()).map_err(wrap_storage_error) + self.trie_update + .get_ref(&storage_key, KeyLookupMode::FlatStorage) + .map(|x| x.is_some()) + .map_err(wrap_storage_error) } fn storage_remove_subtree(&mut self, prefix: &[u8]) -> ExtResult<()> { From 4c4b92c4f29bac1d3dc47b9be48016cef89e8d6c Mon Sep 17 00:00:00 2001 From: Jakob Meier Date: Fri, 21 Oct 2022 07:24:59 +0100 Subject: [PATCH 005/103] feat: limit trie cache by memory consumption (#7749) Instead of checking the number of values and their sizes, the caches are now limited by the actual (approximated) memory consumption. This changes what `total_size` in `TrieCacheInner` means, which is also observable through Prometheus metrics. Existing configuration works with slightly altered effects. Number of entries convert to an implicit size limit. Since the explicit default size limit currently is 3GB and the default max entries is set 50k, the implicit limit = 50k * 1000B = 50MB is stronger. This still limits the number of largest entries to 50k but allows the cache to be filled with more entries when the values are smaller. For shard 3, however, where the number of entries is set to 45M in code, the memory limit of 3GB is active. Since we change how this limit is calculated we will see fewer entries cached with this change. Shard 3 should still be okay since we have a prefetcher in place now that works even when the cache is empty. --- CHANGELOG.md | 4 ++ core/store/src/trie/config.rs | 33 +++++++------ core/store/src/trie/trie_storage.rs | 72 +++++++++++++++++++++-------- core/store/src/trie/trie_tests.rs | 2 +- 4 files changed, 78 insertions(+), 33 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0c45bcb83db..f5a1de60876 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -48,6 +48,10 @@ with [#7661](https://github.com/near/nearcore/pull/7661). Configurable in `config.json` using `store.enable_receipt_prefetching`. * neard cmd can now verify proofs from JSON files. +* In storage configuration, the value `trie_cache_capacities` now is no longer + a hard limit but instead sets a memory consumption limit. For large trie nodes, + the limits are close to equivalent. For small values, there can now fit more + in the cache than previously. ## 1.29.0 [2022-08-15] diff --git a/core/store/src/trie/config.rs b/core/store/src/trie/config.rs index 18ff5f1adec..e6025117f46 100644 --- a/core/store/src/trie/config.rs +++ b/core/store/src/trie/config.rs @@ -1,3 +1,4 @@ +use crate::trie::trie_storage::TrieCacheInner; use crate::StoreConfig; use near_primitives::shard_layout::ShardUId; use near_primitives::types::AccountId; @@ -42,11 +43,7 @@ pub struct TrieConfig { pub struct ShardCacheConfig { /// Shard cache capacity in number of trie nodes. pub default_max_entries: u64, - /// Limits the sum of all cached value sizes. - /// - /// This is useful to limit total memory consumption. However, crucially this - /// is not a hard limit. It only limits the sum of all cached values, not - /// factoring in the overhead for each entry. + /// Limits the memory consumption for the cache. pub default_max_total_bytes: u64, /// Overrides `default_max_entries` per shard. pub override_max_entries: HashMap, @@ -76,12 +73,6 @@ impl TrieConfig { this } - /// Shard cache capacity in number of trie nodes. - pub fn shard_cache_capacity(&self, shard_uid: ShardUId, is_view: bool) -> u64 { - if is_view { &self.view_shard_cache_config } else { &self.shard_cache_config } - .capacity(shard_uid) - } - /// Shard cache capacity in total bytes. pub fn shard_cache_total_size_limit(&self, shard_uid: ShardUId, is_view: bool) -> u64 { if is_view { &self.view_shard_cache_config } else { &self.shard_cache_config } @@ -106,15 +97,29 @@ impl TrieConfig { } impl ShardCacheConfig { + // TODO(#7894): Remove this when `trie_cache_capacities` is removed from config. fn capacity(&self, shard_uid: ShardUId) -> u64 { self.override_max_entries.get(&shard_uid).cloned().unwrap_or(self.default_max_entries) } fn total_size_limit(&self, shard_uid: ShardUId) -> u64 { - self.override_max_total_bytes + let explicit_limit = self + .override_max_total_bytes .get(&shard_uid) - .cloned() - .unwrap_or(self.default_max_total_bytes) + .copied() + .unwrap_or(self.default_max_total_bytes); + // As long as `trie_cache_capacities` is a config option, it should be respected. + // We no longer commit to a hard limit on this. But we make sure that the old + // worst-case assumption of how much memory would be consumed still works. + // Specifically, the old calculation ignored `PER_ENTRY_OVERHEAD` and used + // `max_cached_value_size()` only to figure out a good value for how many + // nodes we want in the cache at most. + // This implicit limit should result in the same may number of nodes and same max memory + // consumption as the old config. + // TODO(#7894): Remove this when `trie_cache_capacities` is removed from config. + let implicit_limit = self.capacity(shard_uid) + * (TrieCacheInner::PER_ENTRY_OVERHEAD + TrieConfig::max_cached_value_size() as u64); + explicit_limit.min(implicit_limit) } } diff --git a/core/store/src/trie/trie_storage.rs b/core/store/src/trie/trie_storage.rs index d5337f35da8..2c85d01f52f 100644 --- a/core/store/src/trie/trie_storage.rs +++ b/core/store/src/trie/trie_storage.rs @@ -90,14 +90,18 @@ struct TrieCacheMetrics { } impl TrieCacheInner { + /// Assumed number of bytes used to store an entry in the cache. + /// + /// 100 bytes is an approximation based on lru 0.7.5. + pub(crate) const PER_ENTRY_OVERHEAD: u64 = 100; + pub(crate) fn new( - cache_capacity: usize, deletions_queue_capacity: usize, total_size_limit: u64, shard_id: ShardId, is_view: bool, ) -> Self { - assert!(cache_capacity > 0 && total_size_limit > 0); + assert!(total_size_limit > 0); // `itoa` is much faster for printing shard_id to a string than trivial alternatives. let mut buffer = itoa::Buffer::new(); let shard_id_str = buffer.format(shard_id); @@ -116,7 +120,7 @@ impl TrieCacheInner { .with_label_values(&metrics_labels), }; Self { - cache: LruCache::new(cache_capacity), + cache: LruCache::unbounded(), deletions: BoundedQueue::new(deletions_queue_capacity), total_size: 0, total_size_limit, @@ -143,7 +147,7 @@ impl TrieCacheInner { Some(key) => match self.cache.pop(&key) { Some(value) => { self.metrics.shard_cache_pop_hits.inc(); - self.total_size -= value.len() as u64; + self.remove_value_of_size(value.len()); continue; } None => { @@ -157,15 +161,15 @@ impl TrieCacheInner { self.metrics.shard_cache_pop_lru.inc(); let (_, value) = self.cache.pop_lru().expect("Cannot fail because total size capacity is > 0"); - self.total_size -= value.len() as u64; + self.remove_value_of_size(value.len()); } // Add value to the cache. - self.total_size += value.len() as u64; + self.add_value_of_size(value.len()); match self.cache.push(key, value) { Some((evicted_key, evicted_value)) => { log_assert!(key == evicted_key, "LRU cache with shard_id = {}, is_view = {} can't be full before inserting key {}", self.shard_id, self.is_view, key); - self.total_size -= evicted_value.len() as u64; + self.remove_value_of_size(evicted_value.len()); } None => {} }; @@ -182,7 +186,7 @@ impl TrieCacheInner { Some(key_to_delete) => match self.cache.pop(&key_to_delete) { Some(evicted_value) => { self.metrics.shard_cache_pop_hits.inc(); - self.total_size -= evicted_value.len() as u64; + self.remove_value_of_size(evicted_value.len()); Some((key_to_delete, evicted_value)) } None => { @@ -198,13 +202,29 @@ impl TrieCacheInner { } } + /// Number of currently cached entries. pub fn len(&self) -> usize { self.cache.len() } + /// Account consumed memory for a new entry in the cache. + pub(crate) fn add_value_of_size(&mut self, len: usize) { + self.total_size += Self::entry_size(len); + } + + /// Remove consumed memory for an entry in the cache. + pub(crate) fn remove_value_of_size(&mut self, len: usize) { + self.total_size -= Self::entry_size(len); + } + + /// Approximate memory consumption of LRU cache. pub fn current_total_size(&self) -> u64 { self.total_size } + + fn entry_size(len: usize) -> u64 { + len as u64 + Self::PER_ENTRY_OVERHEAD + } } /// Wrapper over LruCache to handle concurrent access. @@ -213,11 +233,9 @@ pub struct TrieCache(pub(crate) Arc>); impl TrieCache { pub fn new(config: &TrieConfig, shard_uid: ShardUId, is_view: bool) -> Self { - let capacity = config.shard_cache_capacity(shard_uid, is_view); let total_size_limit = config.shard_cache_total_size_limit(shard_uid, is_view); let queue_capacity = config.deletions_queue_capacity(); Self(Arc::new(Mutex::new(TrieCacheInner::new( - capacity as usize, queue_capacity, total_size_limit, shard_uid.shard_id(), @@ -654,25 +672,27 @@ mod trie_cache_tests { #[test] fn test_size_limit() { - let mut cache = TrieCacheInner::new(100, 100, 5, 0, false); + let value_size_sum = 5; + let memory_overhead = 2 * TrieCacheInner::PER_ENTRY_OVERHEAD; + let mut cache = TrieCacheInner::new(100, value_size_sum + memory_overhead, 0, false); // Add three values. Before each put, condition on total size should not be triggered. put_value(&mut cache, &[1, 1]); - assert_eq!(cache.total_size, 2); + assert_eq!(cache.current_total_size(), 2 + TrieCacheInner::PER_ENTRY_OVERHEAD); put_value(&mut cache, &[1, 1, 1]); - assert_eq!(cache.total_size, 5); + assert_eq!(cache.current_total_size(), 5 + 2 * TrieCacheInner::PER_ENTRY_OVERHEAD); put_value(&mut cache, &[1]); - assert_eq!(cache.total_size, 6); + assert_eq!(cache.current_total_size(), 6 + 3 * TrieCacheInner::PER_ENTRY_OVERHEAD); - // Add one of previous values. LRU value should be evicted. + // Add one of previous values. 2 LRU values should be evicted. put_value(&mut cache, &[1, 1, 1]); - assert_eq!(cache.total_size, 4); + assert_eq!(cache.current_total_size(), 4 + 2 * TrieCacheInner::PER_ENTRY_OVERHEAD); assert_eq!(cache.cache.pop_lru(), Some((hash(&[1]), vec![1].into()))); assert_eq!(cache.cache.pop_lru(), Some((hash(&[1, 1, 1]), vec![1, 1, 1].into()))); } #[test] fn test_deletions_queue() { - let mut cache = TrieCacheInner::new(100, 2, 100, 0, false); + let mut cache = TrieCacheInner::new(2, 1000, 0, false); // Add two values to the cache. put_value(&mut cache, &[1]); put_value(&mut cache, &[1, 1]); @@ -686,9 +706,12 @@ mod trie_cache_tests { assert_eq!(cache.pop(&hash(&[1])), Some((hash(&[1]), vec![1].into()))); } + /// test implicit capacity limit imposed by memory limit #[test] fn test_cache_capacity() { - let mut cache = TrieCacheInner::new(2, 100, 100, 0, false); + let capacity = 2; + let total_size_limit = TrieCacheInner::PER_ENTRY_OVERHEAD * capacity; + let mut cache = TrieCacheInner::new(100, total_size_limit, 0, false); put_value(&mut cache, &[1]); put_value(&mut cache, &[2]); put_value(&mut cache, &[3]); @@ -697,4 +720,17 @@ mod trie_cache_tests { assert!(cache.cache.contains(&hash(&[2]))); assert!(cache.cache.contains(&hash(&[3]))); } + + #[test] + fn test_small_memory_limit() { + let total_size_limit = 1; + let mut cache = TrieCacheInner::new(100, total_size_limit, 0, false); + put_value(&mut cache, &[1, 2, 3]); + put_value(&mut cache, &[2, 3, 4]); + put_value(&mut cache, &[3, 4, 5]); + + assert!(!cache.cache.contains(&hash(&[1, 2, 3]))); + assert!(!cache.cache.contains(&hash(&[2, 3, 4]))); + assert!(cache.cache.contains(&hash(&[3, 4, 5]))); + } } diff --git a/core/store/src/trie/trie_tests.rs b/core/store/src/trie/trie_tests.rs index 469c025aaa2..0b17a318866 100644 --- a/core/store/src/trie/trie_tests.rs +++ b/core/store/src/trie/trie_tests.rs @@ -348,7 +348,7 @@ mod caching_storage_tests { let shard_uid = ShardUId::single_shard(); let store = create_store_with_values(&values, shard_uid); let mut trie_config = TrieConfig::default(); - trie_config.shard_cache_config.override_max_entries.insert(shard_uid, shard_cache_size); + trie_config.shard_cache_config.override_max_total_bytes.insert(shard_uid, shard_cache_size); let trie_cache = TrieCache::new(&trie_config, shard_uid, false); let trie_caching_storage = TrieCachingStorage::new(store, trie_cache.clone(), shard_uid, false, None); From 485d840d8829b97083655d3f107dda6b28a4a02d Mon Sep 17 00:00:00 2001 From: Marcelo Diop-Gonzalez Date: Fri, 21 Oct 2022 11:42:06 -0400 Subject: [PATCH 006/103] feat: add a transaction mirror binary (#7183) This adds code that mirrors traffic from a source chain (e.g. mainnet or testnet) to a test chain with genesis state forked from the source chain. The goal is to produce traffic that looks like source chain traffic. So in a mocknet test where we fork mainnet state for example, we can then actually observe what happens when we subsequently get traffic equivalent to mainnet traffic after the fork point. For more info, see the README in this commit. --- Cargo.lock | 58 ++ Cargo.toml | 3 + core/chain-configs/src/genesis_config.rs | 2 +- core/chain-configs/src/lib.rs | 4 +- neard/Cargo.toml | 1 + neard/src/cli.rs | 10 + pytest/lib/key.py | 7 + pytest/tools/mirror/test.py | 470 ++++++++++ tools/mirror/Cargo.toml | 43 + tools/mirror/README.md | 59 ++ tools/mirror/src/chain_tracker.rs | 430 +++++++++ tools/mirror/src/cli.rs | 133 +++ tools/mirror/src/genesis.rs | 83 ++ tools/mirror/src/key_mapping.rs | 111 +++ tools/mirror/src/lib.rs | 1046 ++++++++++++++++++++++ tools/mirror/src/metrics.rs | 21 + tools/mirror/src/secret.rs | 85 ++ 17 files changed, 2563 insertions(+), 3 deletions(-) create mode 100755 pytest/tools/mirror/test.py create mode 100644 tools/mirror/Cargo.toml create mode 100644 tools/mirror/README.md create mode 100644 tools/mirror/src/chain_tracker.rs create mode 100644 tools/mirror/src/cli.rs create mode 100644 tools/mirror/src/genesis.rs create mode 100644 tools/mirror/src/key_mapping.rs create mode 100644 tools/mirror/src/lib.rs create mode 100644 tools/mirror/src/metrics.rs create mode 100644 tools/mirror/src/secret.rs diff --git a/Cargo.lock b/Cargo.lock index 568e10b6629..88db047e438 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1477,6 +1477,7 @@ checksum = "f2fb860ca6fafa5552fb6d0e816a69c8e49f0908bf524e30a90d97c85892d506" dependencies = [ "block-buffer 0.10.2", "crypto-common", + "subtle", ] [[package]] @@ -2107,6 +2108,24 @@ dependencies = [ "proc-macro-hack", ] +[[package]] +name = "hkdf" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "791a029f6b9fc27657f6f188ec6e5e43f6911f6f878e0dc5501396e09809d437" +dependencies = [ + "hmac", +] + +[[package]] +name = "hmac" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" +dependencies = [ + "digest 0.10.3", +] + [[package]] name = "http" version = "0.2.7" @@ -3157,6 +3176,44 @@ dependencies = [ "serde_json", ] +[[package]] +name = "near-mirror" +version = "0.0.0" +dependencies = [ + "actix", + "anyhow", + "borsh", + "bs58", + "clap 3.1.18", + "ed25519-dalek", + "hex", + "hkdf", + "near-chain-configs", + "near-client", + "near-client-primitives", + "near-crypto", + "near-indexer", + "near-indexer-primitives", + "near-network", + "near-o11y", + "near-primitives", + "near-primitives-core", + "near-store", + "nearcore", + "once_cell", + "openssl-probe", + "rand_core 0.5.1", + "rocksdb", + "secp256k1", + "serde", + "serde_json", + "sha2 0.10.2", + "strum", + "thiserror", + "tokio", + "tracing", +] + [[package]] name = "near-network" version = "0.0.0" @@ -3615,6 +3672,7 @@ dependencies = [ "futures", "near-chain-configs", "near-jsonrpc-primitives", + "near-mirror", "near-o11y", "near-performance-metrics", "near-primitives", diff --git a/Cargo.toml b/Cargo.toml index 3f6c0ae6df9..60a4ac317f8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -45,6 +45,7 @@ members = [ "tools/chainsync-loadtest", "tools/delay-detector", "tools/indexer/example", + "tools/mirror", "tools/mock-node", "tools/restaked", "tools/rpctypegen/core", @@ -111,6 +112,7 @@ fs2 = "0.4" futures = "0.3.5" futures-util = "0.3" hex = { version = "0.4.2", features = ["serde"] } +hkdf = "0.12.3" hyper = { version = "0.14", features = ["full"] } hyper-tls = "0.5.0" im = "15" @@ -148,6 +150,7 @@ protobuf-codegen = "3.0.1" quote = "1.0" rand = "0.8.5" rand_chacha = "0.3.1" +rand_core = "0.5" rand_hc = "0.3.1" rand_xorshift = "0.3" rayon = "1.5" diff --git a/core/chain-configs/src/genesis_config.rs b/core/chain-configs/src/genesis_config.rs index 41e86418aa5..3d1f9f8ad72 100644 --- a/core/chain-configs/src/genesis_config.rs +++ b/core/chain-configs/src/genesis_config.rs @@ -393,7 +393,7 @@ impl<'de, F: FnMut(StateRecord)> DeserializeSeed<'de> for RecordsProcessor<&'_ m } } -fn stream_records_from_file( +pub fn stream_records_from_file( reader: impl Read, mut callback: impl FnMut(StateRecord), ) -> serde_json::Result<()> { diff --git a/core/chain-configs/src/lib.rs b/core/chain-configs/src/lib.rs index c698b3bf881..ab2fcb53ca7 100644 --- a/core/chain-configs/src/lib.rs +++ b/core/chain-configs/src/lib.rs @@ -7,6 +7,6 @@ pub use client_config::{ MIN_GC_NUM_EPOCHS_TO_KEEP, TEST_STATE_SYNC_TIMEOUT, }; pub use genesis_config::{ - get_initial_supply, Genesis, GenesisChangeConfig, GenesisConfig, GenesisRecords, - GenesisValidationMode, ProtocolConfig, ProtocolConfigView, + get_initial_supply, stream_records_from_file, Genesis, GenesisChangeConfig, GenesisConfig, + GenesisRecords, GenesisValidationMode, ProtocolConfig, ProtocolConfigView, }; diff --git a/neard/Cargo.toml b/neard/Cargo.toml index 886461cb545..ffd66616724 100644 --- a/neard/Cargo.toml +++ b/neard/Cargo.toml @@ -34,6 +34,7 @@ tracing.workspace = true nearcore = { path = "../nearcore" } near-chain-configs = { path = "../core/chain-configs" } near-jsonrpc-primitives = { path = "../chain/jsonrpc-primitives" } +near-mirror = { path = "../tools/mirror" } near-primitives = { path = "../core/primitives" } near-performance-metrics = { path = "../utils/near-performance-metrics" } near-state-viewer = { path = "../tools/state-viewer", package = "state-viewer" } diff --git a/neard/src/cli.rs b/neard/src/cli.rs index 3e80ac999f6..3b6ff113433 100644 --- a/neard/src/cli.rs +++ b/neard/src/cli.rs @@ -3,6 +3,7 @@ use anyhow::Context; use clap::{Args, Parser}; use near_chain_configs::GenesisValidationMode; use near_jsonrpc_primitives::types::light_client::RpcLightClientExecutionProofResponse; +use near_mirror::MirrorCommand; use near_o11y::tracing_subscriber::EnvFilter; use near_o11y::{ default_subscriber, default_subscriber_with_opentelemetry, BuildEnvFilterError, @@ -93,6 +94,9 @@ impl NeardCmd { NeardSubCommand::VerifyProof(cmd) => { cmd.run(); } + NeardSubCommand::Mirror(cmd) => { + cmd.run()?; + } }; Ok(()) } @@ -104,6 +108,8 @@ pub(crate) enum RunError { EnvFilter(#[source] BuildEnvFilterError), #[error("could not install a rayon thread pool")] RayonInstall(#[source] rayon::ThreadPoolBuildError), + #[error(transparent)] + Other(#[from] anyhow::Error), } #[derive(Parser)] @@ -189,6 +195,10 @@ pub(super) enum NeardSubCommand { /// Verify proofs #[clap(alias = "verify_proof")] VerifyProof(VerifyProofSubCommand), + + /// Mirror transactions from a source chain to a test chain with state forked + /// from it, reproducing traffic and state as closely as possible. + Mirror(MirrorCommand), } #[derive(Parser)] diff --git a/pytest/lib/key.py b/pytest/lib/key.py index 9dbcc0d5698..9d99385de62 100644 --- a/pytest/lib/key.py +++ b/pytest/lib/key.py @@ -17,6 +17,13 @@ def __init__(self, account_id: str, pk: str, sk: str) -> None: self.pk = pk self.sk = sk + @classmethod + def from_random(cls, account_id: str) -> 'Key': + keys = ed25519.create_keypair(entropy=os.urandom) + sk = 'ed25519:' + base58.b58encode(keys[0].to_bytes()).decode('ascii') + pk = 'ed25519:' + base58.b58encode(keys[1].to_bytes()).decode('ascii') + return cls(account_id, pk, sk) + @classmethod def implicit_account(cls) -> 'Key': keys = ed25519.create_keypair(entropy=os.urandom) diff --git a/pytest/tools/mirror/test.py b/pytest/tools/mirror/test.py new file mode 100755 index 00000000000..b51f9186d97 --- /dev/null +++ b/pytest/tools/mirror/test.py @@ -0,0 +1,470 @@ +#!/usr/bin/env python3 + +import sys, time, base58, random +import atexit +import base58 +import json +import os +import pathlib +import shutil +import signal +import subprocess + +sys.path.append(str(pathlib.Path(__file__).resolve().parents[2] / 'lib')) + +from cluster import init_cluster, spin_up_node, load_config +from configured_logger import logger +from mocknet import create_genesis_file +import transaction +import utils +import key + +# This sets up an environment to test the tools/mirror process. It starts a localnet with a few validators +# and waits for some blocks to be produced. Then we fork the state and start a new chain from that, and +# start the mirror process that should mirror transactions from the source chain to the target chain. +# Halfway through we restart it to make sure that it still works properly when restarted + +TIMEOUT = 240 +NUM_VALIDATORS = 4 +TARGET_VALIDATORS = ['foo0', 'foo1', 'foo2'] +MIRROR_DIR = 'test-mirror' + + +def mkdir_clean(dirname): + try: + dirname.mkdir() + except FileExistsError: + shutil.rmtree(dirname) + dirname.mkdir() + + +def dot_near(): + return pathlib.Path.home() / '.near' + + +def ordinal_to_port(port, ordinal): + return f'0.0.0.0:{port + 10 + ordinal}' + + +def init_target_dir(neard, home, ordinal, validator_account=None): + mkdir_clean(home) + + try: + subprocess.check_output([neard, '--home', home, 'init'], + stderr=subprocess.STDOUT) + except subprocess.CalledProcessError as e: + sys.exit(f'"neard init" command failed: output: {e.stdout}') + shutil.copy(dot_near() / 'test0/config.json', home / 'config.json') + shutil.copy(dot_near() / 'test0/forked/genesis.json', home / 'genesis.json') + shutil.copy(dot_near() / 'test0/forked/records.json', home / 'records.json') + + with open(home / 'config.json', 'r') as f: + config = json.load(f) + config['genesis_records_file'] = 'records.json' + config['network']['addr'] = ordinal_to_port(24567, ordinal) + config['rpc']['addr'] = ordinal_to_port(3030, ordinal) + with open(home / 'config.json', 'w') as f: + json.dump(config, f) + + if validator_account is None: + os.remove(home / 'validator_key.json') + else: + # this key and the suffix -load-test.near are hardcoded in create_genesis_file() + with open(home / 'validator_key.json', 'w') as f: + json.dump( + { + 'account_id': + f'{validator_account + "-load-test.near"}', + 'public_key': + 'ed25519:76NVkDErhbP1LGrSAf5Db6BsFJ6LBw6YVA4BsfTBohmN', + 'secret_key': + 'ed25519:3cCk8KUWBySGCxBcn1syMoY5u73wx5eaPLRbQcMi23LwBA3aLsqEbA33Ww1bsJaFrchmDciGe9otdn45SrDSkow2' + }, f) + + +def init_target_dirs(neard): + ordinal = NUM_VALIDATORS + 1 + dirs = [] + + for account_id in TARGET_VALIDATORS: + home = dot_near() / f'test_target_{account_id}' + dirs.append(str(home)) + init_target_dir(neard, home, ordinal, validator_account=account_id) + ordinal += 1 + + observer = dot_near() / f'{MIRROR_DIR}/target' + init_target_dir(neard, observer, ordinal, validator_account=None) + shutil.copy(dot_near() / 'test0/output/mirror-secret.json', + observer / 'mirror-secret.json') + return dirs, observer + + +def create_forked_chain(config, near_root): + binary_name = config.get('binary_name', 'neard') + neard = os.path.join(near_root, binary_name) + try: + subprocess.check_output([ + neard, "--home", + dot_near() / 'test0', "view-state", "dump-state", "--stream" + ], + stderr=subprocess.STDOUT) + except subprocess.CalledProcessError as e: + sys.exit(f'"dump-state" command failed: output: {e.stdout}') + try: + subprocess.check_output([ + neard, + 'mirror', + 'prepare', + '--records-file-in', + dot_near() / 'test0/output/records.json', + '--records-file-out', + dot_near() / 'test0/output/mirror-records.json', + '--secret-file-out', + dot_near() / 'test0/output/mirror-secret.json', + ], + stderr=subprocess.STDOUT) + except subprocess.CalledProcessError as e: + sys.exit(f'"mirror prepare" command failed: output: {e.stdout}') + + os.mkdir(dot_near() / 'test0/forked') + genesis_filename_in = dot_near() / 'test0/output/genesis.json' + genesis_filename_out = dot_near() / 'test0/forked/genesis.json' + records_filename_in = dot_near() / 'test0/output/mirror-records.json' + records_filename_out = dot_near() / 'test0/forked/records.json' + create_genesis_file(TARGET_VALIDATORS, + genesis_filename_in=genesis_filename_in, + genesis_filename_out=genesis_filename_out, + records_filename_in=records_filename_in, + records_filename_out=records_filename_out, + rpc_node_names=[], + chain_id='foonet', + append=True, + epoch_length=20, + node_pks=None, + increasing_stakes=0.0, + num_seats=len(TARGET_VALIDATORS)) + return init_target_dirs(neard) + + +def init_mirror_dir(home, source_boot_node): + mkdir_clean(dot_near() / MIRROR_DIR) + os.rename(home, dot_near() / f'{MIRROR_DIR}/source') + ordinal = NUM_VALIDATORS + with open(dot_near() / f'{MIRROR_DIR}/source/config.json', 'r') as f: + config = json.load(f) + config['network']['boot_nodes'] = source_boot_node.addr_with_pk() + config['network']['addr'] = ordinal_to_port(24567, ordinal) + config['rpc']['addr'] = ordinal_to_port(3030, ordinal) + with open(dot_near() / f'{MIRROR_DIR}/source/config.json', 'w') as f: + json.dump(config, f) + + +def mirror_cleanup(process): + process.send_signal(signal.SIGINT) + try: + process.wait(5) + except: + process.kill() + logger.error('can\'t kill mirror process') + + +def start_mirror(near_root, source_home, target_home, boot_node): + env = os.environ.copy() + env["RUST_LOG"] = "actix_web=warn,mio=warn,tokio_util=warn,actix_server=warn,actix_http=warn," + env.get( + "RUST_LOG", "debug") + with open(dot_near() / f'{MIRROR_DIR}/stdout', 'ab') as stdout, \ + open(dot_near() / f'{MIRROR_DIR}/stderr', 'ab') as stderr: + process = subprocess.Popen([ + os.path.join(near_root, 'neard'), 'mirror', 'run', "--source-home", + source_home, "--target-home", target_home, '--secret-file', + target_home / 'mirror-secret.json' + ], + stdin=subprocess.DEVNULL, + stdout=stdout, + stderr=stderr, + env=env) + logger.info("Started mirror process") + atexit.register(mirror_cleanup, process) + with open(target_home / 'config.json', 'r') as f: + config = json.load(f) + config['network']['boot_nodes'] = boot_node.addr_with_pk() + with open(target_home / 'config.json', 'w') as f: + json.dump(config, f) + return process + + +# we'll test out adding an access key and then sending txs signed with it +# since that hits some codepaths we want to test +def send_add_access_key(node, creator_key, nonce, block_hash): + k = key.Key.from_random('test0') + action = transaction.create_full_access_key_action(k.decoded_pk()) + tx = transaction.sign_and_serialize_transaction('test0', nonce, [action], + block_hash, 'test0', + creator_key.decoded_pk(), + creator_key.decoded_sk()) + node.send_tx(tx) + return k + + +def create_subaccount(node, signer_key, nonce, block_hash): + k = key.Key.from_random('foo.' + signer_key.account_id) + actions = [] + actions.append(transaction.create_create_account_action()) + actions.append(transaction.create_full_access_key_action(k.decoded_pk())) + actions.append(transaction.create_payment_action(10**24)) + # add an extra one just to exercise some more corner cases + actions.append( + transaction.create_full_access_key_action( + key.Key.from_random(k.account_id).decoded_pk())) + + tx = transaction.sign_and_serialize_transaction(k.account_id, nonce, + actions, block_hash, + signer_key.account_id, + signer_key.decoded_pk(), + signer_key.decoded_sk()) + node.send_tx(tx) + return k + + +# a key that we added with an AddKey tx or implicit account transfer. +# just for nonce handling convenience +class AddedKey: + + def __init__(self, key): + self.nonce = None + self.key = key + + def send_if_inited(self, node, transfers, block_hash): + if self.nonce is None: + self.nonce = node.get_nonce_for_pk(self.key.account_id, self.key.pk) + + if self.nonce is not None: + for (receiver_id, amount) in transfers: + self.nonce += 1 + tx = transaction.sign_payment_tx(self.key, receiver_id, amount, + self.nonce, block_hash) + node.send_tx(tx) + + +class ImplicitAccount: + + def __init__(self): + self.key = AddedKey(key.Key.implicit_account()) + + def account_id(self): + return self.key.key.account_id + + def transfer(self, node, sender_key, amount, block_hash, nonce): + tx = transaction.sign_payment_tx(sender_key, self.account_id(), amount, + nonce, block_hash) + node.send_tx(tx) + logger.info( + f'sent {amount} to initialize implicit account {self.account_id()}') + + def send_if_inited(self, node, transfers, block_hash): + self.key.send_if_inited(node, transfers, block_hash) + + +def count_total_txs(node, min_height=0): + total = 0 + h = node.get_latest_block().hash + while True: + block = node.get_block(h)['result'] + height = int(block['header']['height']) + if height < min_height: + return total + + for c in block['chunks']: + if int(c['height_included']) == height: + chunk = node.get_chunk(c['chunk_hash'])['result'] + total += len(chunk['transactions']) + + h = block['header']['prev_hash'] + if h == '11111111111111111111111111111111': + return total + + +def check_num_txs(source_node, target_node, start_time, end_source_height): + with open(os.path.join(target_node.node_dir, 'genesis.json'), 'r') as f: + genesis_height = json.load(f)['genesis_height'] + with open(os.path.join(target_node.node_dir, 'config.json'), 'r') as f: + delay = json.load(f)['consensus']['min_block_production_delay'] + block_delay = 10**9 * int(delay['secs']) + int(delay['nanos']) + block_delay = block_delay / 10**9 + + total_source_txs = count_total_txs(source_node, min_height=genesis_height) + + # start_time is the time the mirror binary was started. Give it 20 seconds to + # sync and then 50% more than min_block_production_delay for each block between + # the start and end points of the source chain. Not ideal to be basing a test on time + # like this but there's no real strong guarantee on when the transactions should + # make it on chain, so this is some kind of reasonable timeout + + total_time_allowed = 20 + (end_source_height - + genesis_height) * block_delay * 1.5 + time_elapsed = time.time() - start_time + if time_elapsed < total_time_allowed: + time_left = total_time_allowed - time_elapsed + logger.info( + f'waiting for {int(time_left)} seconds to allow transactions to make it to the target chain' + ) + time.sleep(time_left) + + total_target_txs = count_total_txs(target_node) + assert total_source_txs == total_target_txs, (total_source_txs, + total_target_txs) + logger.info(f'all {total_source_txs} transactions mirrored') + + +def main(): + config_changes = {} + for i in range(NUM_VALIDATORS + 1): + config_changes[i] = {"tracked_shards": [0, 1, 2, 3], "archive": True} + + config = load_config() + near_root, node_dirs = init_cluster(num_nodes=NUM_VALIDATORS, + num_observers=1, + num_shards=4, + config=config, + genesis_config_changes=[ + ["epoch_length", 10], + ], + client_config_changes=config_changes) + + nodes = [spin_up_node(config, near_root, node_dirs[0], 0)] + + init_mirror_dir(node_dirs[NUM_VALIDATORS], nodes[0]) + + for i in range(1, NUM_VALIDATORS): + nodes.append( + spin_up_node(config, near_root, node_dirs[i], i, + boot_node=nodes[0])) + + ctx = utils.TxContext([i for i in range(len(nodes))], nodes) + + implicit_account1 = ImplicitAccount() + for height, block_hash in utils.poll_blocks(nodes[0], timeout=TIMEOUT): + implicit_account1.transfer(nodes[0], nodes[0].signer_key, 10**24, + base58.b58decode(block_hash.encode('utf8')), + ctx.next_nonce) + ctx.next_nonce += 1 + break + + for height, block_hash in utils.poll_blocks(nodes[0], timeout=TIMEOUT): + block_hash_bytes = base58.b58decode(block_hash.encode('utf8')) + + implicit_account1.send_if_inited(nodes[0], [('test2', height), + ('test3', height)], + block_hash_bytes) + ctx.send_moar_txs(block_hash, 10, use_routing=False) + + if height > 12: + break + + nodes[0].kill() + target_node_dirs, target_observer_dir = create_forked_chain( + config, near_root) + nodes[0].start(boot_node=nodes[1]) + + ordinal = NUM_VALIDATORS + 1 + target_nodes = [ + spin_up_node(config, near_root, target_node_dirs[0], ordinal) + ] + for i in range(1, len(target_node_dirs)): + ordinal += 1 + target_nodes.append( + spin_up_node(config, + near_root, + target_node_dirs[i], + ordinal, + boot_node=target_nodes[0])) + + p = start_mirror(near_root, + dot_near() / f'{MIRROR_DIR}/source/', target_observer_dir, + target_nodes[0]) + start_time = time.time() + start_source_height = nodes[0].get_latest_block().height + restarted = False + + subaccount_key = AddedKey( + create_subaccount(nodes[0], nodes[0].signer_key, ctx.next_nonce, + block_hash_bytes)) + ctx.next_nonce += 1 + + new_key = AddedKey( + send_add_access_key(nodes[0], nodes[0].signer_key, ctx.next_nonce, + block_hash_bytes)) + ctx.next_nonce += 1 + + implicit_account2 = ImplicitAccount() + # here we are gonna send a tiny amount (1 yoctoNEAR) to the implicit account and + # then wait a bit before properly initializing it. This hits a corner case where the + # mirror binary needs to properly look for the second tx's outcome to find the starting + # nonce because the first one failed + implicit_account2.transfer(nodes[0], nodes[0].signer_key, 1, + block_hash_bytes, ctx.next_nonce) + ctx.next_nonce += 1 + time.sleep(2) + implicit_account2.transfer(nodes[0], nodes[0].signer_key, 10**24, + block_hash_bytes, ctx.next_nonce) + ctx.next_nonce += 1 + + for height, block_hash in utils.poll_blocks(nodes[0], timeout=TIMEOUT): + code = p.poll() + if code is not None: + assert code == 0 + break + + block_hash_bytes = base58.b58decode(block_hash.encode('utf8')) + + ctx.send_moar_txs(block_hash, 10, use_routing=False) + + implicit_account1.send_if_inited( + nodes[0], [('test2', height), ('test1', height), + (implicit_account2.account_id(), height)], + block_hash_bytes) + implicit_account2.send_if_inited( + nodes[1], [('test2', height), ('test0', height), + (implicit_account1.account_id(), height)], + block_hash_bytes) + new_key.send_if_inited(nodes[2], + [('test1', height), ('test2', height), + (implicit_account1.account_id(), height), + (implicit_account2.account_id(), height)], + block_hash_bytes) + subaccount_key.send_if_inited( + nodes[3], [('test3', height), + (implicit_account2.account_id(), height)], + block_hash_bytes) + + if not restarted and height - start_source_height >= 50: + logger.info('stopping mirror process') + p.terminate() + p.wait() + with open(dot_near() / f'{MIRROR_DIR}/stderr', 'ab') as stderr: + stderr.write( + b'<><><><><><><><><><><><> restarting <><><><><><><><><><><><><><><><><><><><>\n' + ) + stderr.write( + b'<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>\n' + ) + stderr.write( + b'<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>\n' + ) + p = start_mirror(near_root, + dot_near() / f'{MIRROR_DIR}/source/', + target_observer_dir, target_nodes[0]) + restarted = True + + if height - start_source_height >= 100: + break + + time.sleep(5) + # we don't need these anymore + for node in nodes[1:]: + node.kill() + check_num_txs(nodes[0], target_nodes[0], start_time, height) + + +if __name__ == '__main__': + main() diff --git a/tools/mirror/Cargo.toml b/tools/mirror/Cargo.toml new file mode 100644 index 00000000000..7137dd87ecb --- /dev/null +++ b/tools/mirror/Cargo.toml @@ -0,0 +1,43 @@ +[package] +name = "near-mirror" +version = "0.0.0" +authors.workspace = true +publish = false +# Please update rust-toolchain.toml as well when changing version here: +rust-version.workspace = true +edition.workspace = true + +[dependencies] +actix.workspace = true +anyhow.workspace = true +borsh.workspace = true +bs58.workspace = true +clap.workspace = true +ed25519-dalek.workspace = true +hex.workspace = true +hkdf.workspace = true +once_cell.workspace = true +openssl-probe.workspace = true +rand_core.workspace = true +rocksdb.workspace = true +secp256k1.workspace = true +serde.workspace = true +serde_json.workspace = true +sha2.workspace = true +strum.workspace = true +thiserror.workspace = true +tokio.workspace = true +tracing.workspace = true + +nearcore = { path = "../../nearcore" } +near-chain-configs = { path = "../../core/chain-configs" } +near-client = { path = "../../chain/client" } +near-client-primitives = { path = "../../chain/client-primitives" } +near-indexer-primitives = { path = "../../chain/indexer-primitives" } +near-indexer = { path = "../../chain/indexer" } +near-network = { path = "../../chain/network" } +near-primitives = { path = "../../core/primitives" } +near-primitives-core = { path = "../../core/primitives-core" } +near-o11y = { path = "../../core/o11y" } +near-store = { path = "../../core/store" } +near-crypto = { path = "../../core/crypto" } \ No newline at end of file diff --git a/tools/mirror/README.md b/tools/mirror/README.md new file mode 100644 index 00000000000..d44b27dd2d5 --- /dev/null +++ b/tools/mirror/README.md @@ -0,0 +1,59 @@ +## Transaction Mirror + +This is some code that tries to help with the following: We have some +chain, let's call it the "source chain", producing blocks and chunks +with transactions as usual, and we have another chain, let's call it +the "target chain" that starts from state forked from the source +chain. Usually this would be done by using the `neard view-state +dump-state` command, and using the resulting genesis and records file +as the start of the target chain. What we want is to then periodically +send the transactions appearing in the source chain after the fork +point to the target chain. Ideally, the traffic we see in the target +chain will be very similar to the traffic in the source chain. + +The first approach we might try is to just send the source chain +transactions byte-for-byte unaltered to the target chain. This almost +works, but not quite, because the `block_hash` field in the +transactions will be rejected. This means we have no choice but to +replace the accounts' public keys in the original forked state, so +that we can sign transactions with a valid `block_hash` field. So the +way we'll use this is that we'll generate the forked state from the +source chain using the usual `dump-state` command, and then run: + +``` +$ mirror prepare --records-file-in "~/.near/output/records.json" --records-file-out "~/.near/output/mapped-records.json" +``` + +This command will output a records file where the keys have been +replaced. And then the logic we end up with when running the +transaction generator is something like this: + +``` +loop { + sleep(ONE_SECOND); + source_block = fetch_block(source_chain_view_client, height); + for chunk in block: + for tx in chunk: + private_key = map_key(tx.public_key) + block_hash = fetch_head_hash(target_chain_view_client) + new_tx = sign_tx(private_key, tx.actions, block_hash) + send_tx(target_chain_client, new_tx) +} +``` + +So then the question is what does `map_key()` do?. If we don't care +about the security of these accounts in the target chain (for example +if the target chain is just some throwaway test chain that nobody +would have any incentive to mess with), we can just use the bytes of +the public key directly as the private key. If we do care somewhat +about security, then we pass a `--secret-key-file` argument to the +`prepare` command, and pass it as an argument to `map_key()`. Using +that makes things a little bit more delicate, since if the generated +secret is ever lost, then it will no longer be possible to mirror any +traffic to the target chain. + +known problems: + +keys in the source chain added with the `promise_batch_action_add_key*` +host functions will not be mapped in the target chain. Maybe a solution +could be to replace those keys manually or something? diff --git a/tools/mirror/src/chain_tracker.rs b/tools/mirror/src/chain_tracker.rs new file mode 100644 index 00000000000..40d7cfd8635 --- /dev/null +++ b/tools/mirror/src/chain_tracker.rs @@ -0,0 +1,430 @@ +use crate::MappedBlock; +use near_crypto::PublicKey; +use near_indexer::StreamerMessage; +use near_indexer_primitives::IndexerTransactionWithOutcome; +use near_primitives::hash::CryptoHash; +use near_primitives::transaction::SignedTransaction; +use near_primitives::types::{AccountId, BlockHeight}; +use near_primitives_core::types::{Nonce, ShardId}; +use std::cmp::Ordering; +use std::collections::hash_map; +use std::collections::HashMap; +use std::collections::{BTreeSet, VecDeque}; +use std::pin::Pin; +use std::time::{Duration, Instant}; + +struct TxSendInfo { + sent_at: Instant, + source_height: BlockHeight, + target_height: BlockHeight, +} + +#[derive(PartialEq, Eq, Debug)] +struct TxId { + hash: CryptoHash, + nonce: Nonce, +} + +impl PartialOrd for TxId { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for TxId { + fn cmp(&self, other: &Self) -> Ordering { + self.nonce.cmp(&other.nonce).then_with(|| self.hash.cmp(&other.hash)) + } +} + +// we want a reference to transactions in .queued_blocks that need to have nonces +// set later. To avoid having the struct be self referential we keep this struct +// with enough info to look it up later. +#[derive(Clone, Debug, PartialEq, Eq)] +pub(crate) struct TxRef { + height: BlockHeight, + shard_id: ShardId, + tx_idx: usize, +} + +struct TxAwaitingNonceCursor<'a> { + txs: &'a [TxRef], + idx: usize, +} + +impl<'a> TxAwaitingNonceCursor<'a> { + fn new(txs: &'a [TxRef]) -> Self { + Self { txs, idx: 0 } + } +} + +pub(crate) struct TxAwaitingNonceIter<'a> { + queued_blocks: &'a VecDeque, + iter: hash_map::Iter<'a, BlockHeight, Vec>, + cursor: Option>, +} + +impl<'a> TxAwaitingNonceIter<'a> { + fn new( + queued_blocks: &'a VecDeque, + txs_awaiting_nonce: &'a HashMap>, + ) -> Self { + let mut iter = txs_awaiting_nonce.iter(); + let cursor = iter.next().map(|(_height, txs)| TxAwaitingNonceCursor::new(txs)); + Self { queued_blocks, iter, cursor } + } +} + +impl<'a> Iterator for TxAwaitingNonceIter<'a> { + type Item = (&'a TxRef, &'a crate::TxAwaitingNonce); + + fn next(&mut self) -> Option { + match &mut self.cursor { + Some(c) => { + let tx_ref = &c.txs[c.idx]; + c.idx += 1; + if c.idx == c.txs.len() { + self.cursor = + self.iter.next().map(|(_height, txs)| TxAwaitingNonceCursor::new(txs)); + } + let block_idx = self + .queued_blocks + .binary_search_by(|b| b.source_height.cmp(&tx_ref.height)) + .unwrap(); + let block = &self.queued_blocks[block_idx]; + let chunk = block.chunks.iter().find(|c| c.shard_id == tx_ref.shard_id).unwrap(); + match &chunk.txs[tx_ref.tx_idx] { + crate::TargetChainTx::AwaitingNonce(tx) => Some((tx_ref, tx)), + crate::TargetChainTx::Ready(_) => unreachable!(), + } + } + None => None, + } + } +} + +// Keeps the queue of upcoming transactions and provides them in regular intervals via next_batch() +// Also keeps track of txs we've sent so far and looks for them on chain, for metrics/logging purposes. +#[derive(Default)] +pub(crate) struct TxTracker { + sent_txs: HashMap, + txs_by_signer: HashMap<(AccountId, PublicKey), BTreeSet>, + queued_blocks: VecDeque, + txs_awaiting_nonce: HashMap>, + pending_access_keys: HashMap<(AccountId, PublicKey), usize>, + height_queued: Option, + send_time: Option>>, + // Config value in the target chain, used to judge how long to wait before sending a new batch of txs + min_block_production_delay: Duration, + // timestamps in the target chain, used to judge how long to wait before sending a new batch of txs + recent_block_timestamps: VecDeque, +} + +impl TxTracker { + pub(crate) fn new(min_block_production_delay: Duration) -> Self { + Self { min_block_production_delay, ..Default::default() } + } + + pub(crate) fn height_queued(&self) -> Option { + self.height_queued + } + + pub(crate) fn num_blocks_queued(&self) -> usize { + self.queued_blocks.len() + } + + pub(crate) fn pending_access_keys_iter<'a>( + &'a self, + ) -> impl Iterator { + self.pending_access_keys.iter().map(|(x, _)| x) + } + + pub(crate) fn tx_awaiting_nonce_iter<'a>(&'a self) -> TxAwaitingNonceIter<'a> { + TxAwaitingNonceIter::new(&self.queued_blocks, &self.txs_awaiting_nonce) + } + + fn pending_access_keys_deref( + &mut self, + source_signer_id: AccountId, + source_public_key: PublicKey, + ) { + match self.pending_access_keys.entry((source_signer_id, source_public_key)) { + hash_map::Entry::Occupied(mut e) => { + let ref_count = e.get_mut(); + if *ref_count == 1 { + e.remove(); + } else { + *ref_count -= 1; + } + } + hash_map::Entry::Vacant(_) => unreachable!(), + } + } + + // We now know of a valid nonce for the transaction referenced by tx_ref. + // Set the nonce and mark the tx as ready to be sent later. + pub(crate) fn set_tx_nonce(&mut self, tx_ref: &TxRef, nonce: Nonce) { + let block_idx = + self.queued_blocks.binary_search_by(|b| b.source_height.cmp(&tx_ref.height)).unwrap(); + let block = &mut self.queued_blocks[block_idx]; + let chunk = block.chunks.iter_mut().find(|c| c.shard_id == tx_ref.shard_id).unwrap(); + let tx = &mut chunk.txs[tx_ref.tx_idx]; + + match self.txs_awaiting_nonce.entry(tx_ref.height) { + hash_map::Entry::Occupied(mut e) => { + let txs = e.get_mut(); + if txs.len() == 1 { + assert!(&txs[0] == tx_ref); + e.remove(); + } else { + let idx = txs.iter().position(|t| t == tx_ref).unwrap(); + txs.swap_remove(idx); + } + } + hash_map::Entry::Vacant(_) => unreachable!(), + } + let (source_signer_id, source_public_key) = match &tx { + crate::TargetChainTx::AwaitingNonce(tx) => { + (tx.source_signer_id.clone(), tx.source_public.clone()) + } + crate::TargetChainTx::Ready(_) => unreachable!(), + }; + + tx.set_nonce(nonce); + self.pending_access_keys_deref(source_signer_id, source_public_key); + } + + pub(crate) fn queue_block(&mut self, block: MappedBlock) { + self.height_queued = Some(block.source_height); + let mut txs_awaiting_nonce = Vec::new(); + for c in block.chunks.iter() { + for (tx_idx, tx) in c.txs.iter().enumerate() { + if let crate::TargetChainTx::AwaitingNonce(tx) = tx { + txs_awaiting_nonce.push(TxRef { + height: block.source_height, + shard_id: c.shard_id, + tx_idx, + }); + *self + .pending_access_keys + .entry((tx.source_signer_id.clone(), tx.source_public.clone())) + .or_default() += 1; + } + } + } + if !txs_awaiting_nonce.is_empty() { + self.txs_awaiting_nonce.insert(block.source_height, txs_awaiting_nonce); + } + self.queued_blocks.push_back(block); + } + + pub(crate) fn next_batch_time(&self) -> Instant { + match &self.send_time { + Some(t) => t.as_ref().deadline().into_std(), + None => Instant::now(), + } + } + + pub(crate) async fn next_batch(&mut self) -> Option { + if let Some(sleep) = &mut self.send_time { + sleep.await; + } + let block = self.queued_blocks.pop_front(); + if let Some(block) = &block { + self.txs_awaiting_nonce.remove(&block.source_height); + for chunk in block.chunks.iter() { + for tx in chunk.txs.iter() { + match &tx { + crate::TargetChainTx::AwaitingNonce(tx) => self.pending_access_keys_deref( + tx.source_signer_id.clone(), + tx.source_public.clone(), + ), + crate::TargetChainTx::Ready(_) => {} + } + } + } + } + block + } + + fn remove_tx(&mut self, tx: &IndexerTransactionWithOutcome) { + let k = (tx.transaction.signer_id.clone(), tx.transaction.public_key.clone()); + match self.txs_by_signer.entry(k.clone()) { + hash_map::Entry::Occupied(mut e) => { + let txs = e.get_mut(); + if !txs.remove(&TxId { hash: tx.transaction.hash, nonce: tx.transaction.nonce }) { + tracing::warn!(target: "mirror", "tried to remove nonexistent tx {} from txs_by_signer", tx.transaction.hash); + } + // split off from hash: default() since that's the smallest hash, which will leave us with every tx with nonce + // greater than this one in txs_left. + let txs_left = txs.split_off(&TxId { + hash: CryptoHash::default(), + nonce: tx.transaction.nonce + 1, + }); + if !txs.is_empty() { + tracing::warn!( + target: "mirror", "{} Transactions for {:?} skipped by inclusion of tx with nonce {}: {:?}. These will never make it on chain.", + txs.len(), &k, tx.transaction.nonce, &txs + ); + for t in txs.iter() { + if self.sent_txs.remove(&t.hash).is_none() { + tracing::warn!( + target: "mirror", "tx with hash {} that we thought was skipped is not in the set of sent txs", + &t.hash, + ); + } + } + } + *txs = txs_left; + if txs.is_empty() { + self.txs_by_signer.remove(&k); + } + } + hash_map::Entry::Vacant(_) => { + tracing::warn!( + target: "mirror", "recently removed tx {}, but ({:?}, {:?}) not in txs_by_signer", + tx.transaction.hash, tx.transaction.signer_id, tx.transaction.public_key + ); + return; + } + }; + } + + fn record_block_timestamp(&mut self, msg: &StreamerMessage) { + self.recent_block_timestamps.push_back(msg.block.header.timestamp_nanosec); + if self.recent_block_timestamps.len() > 10 { + self.recent_block_timestamps.pop_front(); + } + } + + pub(crate) fn on_target_block(&mut self, msg: &StreamerMessage) { + self.record_block_timestamp(msg); + for s in msg.shards.iter() { + if let Some(c) = &s.chunk { + for tx in c.transactions.iter() { + if let Some(send_info) = self.sent_txs.remove(&tx.transaction.hash) { + let latency = Instant::now() - send_info.sent_at; + tracing::debug!( + target: "mirror", "found my tx {} from source #{} in target #{} {:?} after sending @ target #{}", + tx.transaction.hash, send_info.source_height, msg.block.header.height, latency, send_info.target_height + ); + crate::metrics::TRANSACTIONS_INCLUDED.inc(); + + self.remove_tx(tx); + } + } + } + } + } + + fn on_tx_sent( + &mut self, + tx: &SignedTransaction, + source_height: BlockHeight, + target_height: BlockHeight, + ) { + let hash = tx.get_hash(); + if self.sent_txs.contains_key(&hash) { + tracing::warn!(target: "mirror", "transaction sent twice: {}", &hash); + return; + } + + // TODO: don't keep adding txs if we're not ever finding them on chain, since we'll OOM eventually + // if that happens. + self.sent_txs + .insert(hash, TxSendInfo { sent_at: Instant::now(), source_height, target_height }); + let txs = self + .txs_by_signer + .entry((tx.transaction.signer_id.clone(), tx.transaction.public_key.clone())) + .or_default(); + + if let Some(highest_nonce) = txs.iter().next_back() { + if highest_nonce.nonce > tx.transaction.nonce { + tracing::warn!( + target: "mirror", "transaction sent with out of order nonce: {}: {}. Sent so far: {:?}", + &hash, tx.transaction.nonce, txs + ); + } + } + if !txs.insert(TxId { hash, nonce: tx.transaction.nonce }) { + tracing::warn!(target: "mirror", "inserted tx {} twice into txs_by_signer", &hash); + } + } + + // among the last 10 blocks, what's the second longest time between their timestamps? + // probably there's a better heuristic to use than that but this will do for now. + fn second_longest_recent_block_delay(&self) -> Option { + if self.recent_block_timestamps.len() < 5 { + return None; + } + let mut last = *self.recent_block_timestamps.front().unwrap(); + let mut longest = None; + let mut second_longest = None; + + for timestamp in self.recent_block_timestamps.iter().skip(1) { + let delay = timestamp - last; + + match longest { + Some(l) => match second_longest { + Some(s) => { + if delay > l { + second_longest = longest; + longest = Some(delay); + } else if delay > s { + second_longest = Some(delay); + } + } + None => { + if delay > l { + second_longest = longest; + longest = Some(delay); + } else { + second_longest = Some(delay); + } + } + }, + None => { + longest = Some(delay); + } + } + last = *timestamp; + } + let delay = Duration::from_nanos(second_longest.unwrap()); + if delay > 2 * self.min_block_production_delay { + tracing::warn!( + "Target chain blocks are taking longer than expected to be produced. Observing delays \ + of {:?} and {:?} vs min_block_production_delay of {:?} ", + delay, + Duration::from_nanos(longest.unwrap()), + self.min_block_production_delay, + ) + } + Some(delay) + } + + // We just successfully sent some transactions. Remember them so we can see if they really show up on chain. + pub(crate) fn on_txs_sent( + &mut self, + txs: &[SignedTransaction], + source_height: BlockHeight, + target_height: BlockHeight, + ) { + tracing::info!( + target: "mirror", "Sent {} transactions from source #{} with target HEAD @ #{}", + txs.len(), source_height, target_height + ); + for tx in txs.iter() { + self.on_tx_sent(tx, source_height, target_height); + } + + let block_delay = self + .second_longest_recent_block_delay() + .unwrap_or(self.min_block_production_delay + Duration::from_millis(100)); + match &mut self.send_time { + Some(t) => t.as_mut().reset(tokio::time::Instant::now() + block_delay), + None => { + self.send_time = Some(Box::pin(tokio::time::sleep(block_delay))); + } + } + } +} diff --git a/tools/mirror/src/cli.rs b/tools/mirror/src/cli.rs new file mode 100644 index 00000000000..4f6ea0a606c --- /dev/null +++ b/tools/mirror/src/cli.rs @@ -0,0 +1,133 @@ +use anyhow::Context; +use clap::Parser; +use std::cell::Cell; +use std::path::PathBuf; + +#[derive(Parser)] +pub struct MirrorCommand { + #[clap(subcommand)] + subcmd: SubCommand, +} + +#[derive(Parser)] +enum SubCommand { + Prepare(PrepareCmd), + Run(RunCmd), +} + +/// Start two NEAR nodes, one for each chain, and try to mirror +/// transactions from the source chain to the target chain. +#[derive(Parser)] +struct RunCmd { + /// source chain home dir + #[clap(long)] + source_home: PathBuf, + /// target chain home dir + #[clap(long)] + target_home: PathBuf, + /// file containing an optional secret as generated by the + /// `prepare` command. Must be provided unless --no-secret is given + #[clap(long)] + secret_file: Option, + /// Equivalent to passing --secret-file where is a + /// config that indicates no secret should be used. If this is + /// given, and --secret-file is also given and points to a config + /// that does contain a secret, the mirror will refuse to start + #[clap(long)] + no_secret: bool, +} + +impl RunCmd { + fn run(self) -> anyhow::Result<()> { + openssl_probe::init_ssl_cert_env_vars(); + let runtime = tokio::runtime::Runtime::new().context("failed to start tokio runtime")?; + + let secret = if let Some(secret_file) = &self.secret_file { + let secret = crate::secret::load(secret_file) + .with_context(|| format!("Failed to load secret from {:?}", secret_file))?; + if secret.is_some() && self.no_secret { + anyhow::bail!( + "--no-secret given with --secret-file indicating that a secret should be used" + ); + } + secret + } else { + if !self.no_secret { + anyhow::bail!("Please give either --secret-file or --no-secret"); + } + None + }; + + let system = new_actix_system(runtime); + system + .block_on(async move { + actix::spawn(crate::run(self.source_home, self.target_home, secret)).await + }) + .unwrap() + } +} + +/// Write a new genesis records file where the public keys have been +/// altered so that this binary can sign transactions when mirroring +/// them from the source chain to the target chain +#[derive(Parser)] +struct PrepareCmd { + /// A genesis records file as output by `neard view-state + /// dump-state --stream` + #[clap(long)] + records_file_in: PathBuf, + /// Path to the new records file with updated public keys + #[clap(long)] + records_file_out: PathBuf, + /// If this is provided, don't use a secret when mapping public + /// keys to new source chain private keys. This means that anyone + /// will be able to sign transactions for the accounts in the + /// target chain corresponding to accounts in the source chain. If + /// that is okay, then --no-secret will make the code run slightly + /// faster, and you won't have to take care to not lose the + /// secret. + #[clap(long)] + no_secret: bool, + /// Path to the secret. Note that if you don't pass --no-secret, + /// this secret is required to sign transactions for the accounts + /// in the target chain corresponding to accounts in the source + /// chain. This means that if you lose this secret, you will no + /// longer be able to mirror any traffic. + #[clap(long)] + secret_file_out: PathBuf, +} + +impl PrepareCmd { + fn run(self) -> anyhow::Result<()> { + crate::genesis::map_records( + &self.records_file_in, + &self.records_file_out, + self.no_secret, + &self.secret_file_out, + ) + } +} + +// copied from neard/src/cli.rs +fn new_actix_system(runtime: tokio::runtime::Runtime) -> actix::SystemRunner { + // `with_tokio_rt()` accepts an `Fn()->Runtime`, however we know that this function is called exactly once. + // This makes it safe to move out of the captured variable `runtime`, which is done by a trick + // using a `swap` of `Cell>`s. + let runtime_cell = Cell::new(Some(runtime)); + actix::System::with_tokio_rt(|| { + let r = Cell::new(None); + runtime_cell.swap(&r); + r.into_inner().unwrap() + }) +} + +impl MirrorCommand { + pub fn run(self) -> anyhow::Result<()> { + tracing::warn!(target: "mirror", "the mirror command is not stable, and may be removed or changed arbitrarily at any time"); + + match self.subcmd { + SubCommand::Prepare(r) => r.run(), + SubCommand::Run(r) => r.run(), + } + } +} diff --git a/tools/mirror/src/genesis.rs b/tools/mirror/src/genesis.rs new file mode 100644 index 00000000000..1fd24eb6a3b --- /dev/null +++ b/tools/mirror/src/genesis.rs @@ -0,0 +1,83 @@ +use near_primitives::state_record::StateRecord; +use serde::ser::{SerializeSeq, Serializer}; +use std::fs::File; +use std::io::{BufReader, BufWriter}; +use std::path::Path; + +pub fn map_records>( + records_file_in: P, + records_file_out: P, + no_secret: bool, + secret_file_out: P, +) -> anyhow::Result<()> { + let secret = if !no_secret { + Some(crate::secret::generate(secret_file_out)?) + } else { + crate::secret::write_empty(secret_file_out)?; + None + }; + let reader = BufReader::new(File::open(records_file_in)?); + let records_out = BufWriter::new(File::create(records_file_out)?); + let mut records_ser = serde_json::Serializer::new(records_out); + let mut records_seq = records_ser.serialize_seq(None).unwrap(); + + near_chain_configs::stream_records_from_file(reader, |mut r| { + match &mut r { + StateRecord::AccessKey { account_id, public_key, access_key } => { + let replacement = crate::key_mapping::map_key(&public_key, secret.as_ref()); + let new_record = StateRecord::AccessKey { + account_id: crate::key_mapping::map_account(&account_id, secret.as_ref()), + public_key: replacement.public_key(), + access_key: access_key.clone(), + }; + // TODO: would be nice for stream_records_from_file() to let you return early on error so + // we dont have to unwrap here + records_seq.serialize_element(&new_record).unwrap(); + } + StateRecord::Account { account_id, .. } => { + if account_id.is_implicit() { + *account_id = crate::key_mapping::map_account(&account_id, secret.as_ref()); + } + records_seq.serialize_element(&r).unwrap(); + } + StateRecord::Data { account_id, .. } => { + if account_id.is_implicit() { + *account_id = crate::key_mapping::map_account(&account_id, secret.as_ref()); + } + records_seq.serialize_element(&r).unwrap(); + } + StateRecord::Contract { account_id, .. } => { + if account_id.is_implicit() { + *account_id = crate::key_mapping::map_account(&account_id, secret.as_ref()); + } + records_seq.serialize_element(&r).unwrap(); + } + StateRecord::PostponedReceipt(receipt) => { + if receipt.predecessor_id.is_implicit() || receipt.receiver_id.is_implicit() { + receipt.predecessor_id = + crate::key_mapping::map_account(&receipt.predecessor_id, secret.as_ref()); + receipt.receiver_id = + crate::key_mapping::map_account(&receipt.receiver_id, secret.as_ref()); + } + records_seq.serialize_element(&r).unwrap(); + } + StateRecord::ReceivedData { account_id, .. } => { + if account_id.is_implicit() { + *account_id = crate::key_mapping::map_account(&account_id, secret.as_ref()); + } + records_seq.serialize_element(&r).unwrap(); + } + StateRecord::DelayedReceipt(receipt) => { + if receipt.predecessor_id.is_implicit() || receipt.receiver_id.is_implicit() { + receipt.predecessor_id = + crate::key_mapping::map_account(&receipt.predecessor_id, secret.as_ref()); + receipt.receiver_id = + crate::key_mapping::map_account(&receipt.receiver_id, secret.as_ref()); + } + records_seq.serialize_element(&r).unwrap(); + } + }; + })?; + records_seq.end()?; + Ok(()) +} diff --git a/tools/mirror/src/key_mapping.rs b/tools/mirror/src/key_mapping.rs new file mode 100644 index 00000000000..01df970fd8c --- /dev/null +++ b/tools/mirror/src/key_mapping.rs @@ -0,0 +1,111 @@ +use borsh::BorshDeserialize; +use hkdf::Hkdf; +use near_crypto::{ED25519PublicKey, ED25519SecretKey, PublicKey, Secp256K1PublicKey, SecretKey}; +use near_primitives::types::AccountId; +use sha2::Sha256; + +fn ed25519_map_secret( + buf: &mut [u8], + public: &ED25519PublicKey, + secret: Option<&[u8; crate::secret::SECRET_LEN]>, +) { + match secret { + Some(secret) => { + let hk = Hkdf::::new(None, secret); + hk.expand(&public.0, buf).unwrap(); + } + None => { + buf.copy_from_slice(&public.0); + } + }; +} + +fn map_ed25519( + public: &ED25519PublicKey, + secret: Option<&[u8; crate::secret::SECRET_LEN]>, +) -> ED25519SecretKey { + let mut buf = [0; ed25519_dalek::KEYPAIR_LENGTH]; + + ed25519_map_secret(&mut buf[..ed25519_dalek::SECRET_KEY_LENGTH], public, secret); + + let secret_key = + ed25519_dalek::SecretKey::from_bytes(&buf[..ed25519_dalek::SECRET_KEY_LENGTH]).unwrap(); + let public_key = ed25519_dalek::PublicKey::from(&secret_key); + + buf[ed25519_dalek::SECRET_KEY_LENGTH..].copy_from_slice(public_key.as_bytes()); + ED25519SecretKey(buf) +} + +fn secp256k1_from_slice(buf: &mut [u8], public: &Secp256K1PublicKey) -> secp256k1::SecretKey { + match secp256k1::SecretKey::from_slice(buf) { + Ok(s) => s, + Err(_) => { + tracing::warn!(target: "mirror", "Something super unlikely occurred! SECP256K1 key mapped from {:?} is too large. Flipping most significant bit.", public); + // If we got an error, it means that either `buf` is all zeros, or that when interpreted as a 256-bit + // int, it is larger than the order of the secp256k1 curve. Since the order of the curve starts with 0xFF, + // in either case flipping the first bit should work, and we can unwrap() below. + buf[0] ^= 0x80; + secp256k1::SecretKey::from_slice(buf).unwrap() + } + } +} + +fn map_secp256k1( + public: &Secp256K1PublicKey, + secret: Option<&[u8; crate::secret::SECRET_LEN]>, +) -> secp256k1::SecretKey { + let mut buf = [0; secp256k1::constants::SECRET_KEY_SIZE]; + + match secret { + Some(secret) => { + let hk = Hkdf::::new(None, secret); + hk.expand(public.as_ref(), &mut buf).unwrap(); + } + None => { + buf.copy_from_slice(&public.as_ref()[..secp256k1::constants::SECRET_KEY_SIZE]); + } + }; + + secp256k1_from_slice(&mut buf, public) +} + +// This maps the public key to a secret key so that we can sign +// transactions on the target chain. If secret is None, then we just +// use the bytes of the public key directly, otherwise we feed the +// public key to a key derivation function. +pub(crate) fn map_key( + key: &PublicKey, + secret: Option<&[u8; crate::secret::SECRET_LEN]>, +) -> SecretKey { + match key { + PublicKey::ED25519(k) => SecretKey::ED25519(map_ed25519(k, secret)), + PublicKey::SECP256K1(k) => SecretKey::SECP256K1(map_secp256k1(k, secret)), + } +} + +// returns the public key encoded in this implicit account. panics if it's not +// actually an implicit account +// basically copy pasted from runtime/runtime/src/actions.rs +pub(crate) fn implicit_account_key(account_id: &AccountId) -> PublicKey { + let mut public_key_data = Vec::with_capacity(33); + public_key_data.push(0u8); + public_key_data.extend(hex::decode(account_id.as_ref().as_bytes()).unwrap()); + assert_eq!(public_key_data.len(), 33); + PublicKey::try_from_slice(&public_key_data).unwrap() +} + +// If it's an implicit account, interprets it as an ed25519 public key, maps that and then returns +// the resulting implicit account. Otherwise does nothing. We do this so that transactions creating +// an implicit account by sending money will generate an account that we can control +pub(crate) fn map_account( + account_id: &AccountId, + secret: Option<&[u8; crate::secret::SECRET_LEN]>, +) -> AccountId { + if account_id.is_implicit() { + let public_key = implicit_account_key(account_id); + let mapped_key = map_key(&public_key, secret); + hex::encode(mapped_key.public_key().key_data()).parse().unwrap() + } else { + account_id.clone() + } +} diff --git a/tools/mirror/src/lib.rs b/tools/mirror/src/lib.rs new file mode 100644 index 00000000000..b53f7c200a8 --- /dev/null +++ b/tools/mirror/src/lib.rs @@ -0,0 +1,1046 @@ +use actix::Addr; +use anyhow::Context; +use borsh::{BorshDeserialize, BorshSerialize}; +use near_chain_configs::GenesisValidationMode; +use near_client::{ClientActor, ViewClientActor}; +use near_client_primitives::types::{ + GetBlock, GetBlockError, GetChunk, GetChunkError, GetExecutionOutcome, + GetExecutionOutcomeError, GetExecutionOutcomeResponse, Query, QueryError, +}; +use near_crypto::{PublicKey, SecretKey}; +use near_indexer::{Indexer, StreamerMessage}; +use near_network::types::{NetworkClientMessages, NetworkClientResponses}; +use near_o11y::WithSpanContextExt; +use near_primitives::hash::CryptoHash; +use near_primitives::transaction::{ + Action, AddKeyAction, DeleteKeyAction, SignedTransaction, Transaction, +}; +use near_primitives::types::{ + AccountId, BlockHeight, BlockId, BlockReference, Finality, TransactionOrReceiptId, +}; +use near_primitives::views::{ + ExecutionStatusView, QueryRequest, QueryResponseKind, SignedTransactionView, +}; +use near_primitives_core::types::{Nonce, ShardId}; +use nearcore::config::NearConfig; +use rocksdb::DB; +use std::collections::HashSet; +use std::path::Path; +use std::time::{Duration, Instant}; +use strum::IntoEnumIterator; +use tokio::sync::mpsc; + +mod chain_tracker; +pub mod cli; +mod genesis; +mod key_mapping; +mod metrics; +mod secret; + +pub use cli::MirrorCommand; + +#[derive(strum::EnumIter)] +enum DBCol { + Misc, + // This tracks nonces for Access Keys added by AddKey transactions + // or transfers to implicit accounts (not present in the genesis state). + // For a given (account ID, public key), if we're preparing a transaction + // and there's no entry in the DB, then the key was present in the genesis + // state. Otherwise, we map tx nonces according to the values in this column. + Nonces, +} + +impl DBCol { + fn name(&self) -> &'static str { + match self { + Self::Misc => "miscellaneous", + Self::Nonces => "nonces", + } + } +} + +// returns bytes that serve as the key corresponding to this pair in the Nonces column +fn nonce_col_key(account_id: &AccountId, public_key: &PublicKey) -> Vec { + (account_id.clone(), public_key.clone()).try_to_vec().unwrap() +} + +#[derive(Clone, BorshDeserialize, BorshSerialize, Debug, PartialEq, Eq, PartialOrd, Hash)] +struct TxIds { + tx_hash: CryptoHash, + signer_id: AccountId, + receiver_id: AccountId, +} + +// For a given AddKey Action, records the starting nonces of the +// resulting Access Keys. We need this because when an AddKey receipt +// is processed, the nonce field of the AddKey action is actually +// ignored, and it's set to block_height*1000000, so to generate +// transactions with valid nonces, we need to map valid source chain +// nonces to valid target chain nonces. +#[derive(BorshDeserialize, BorshSerialize, Debug, Default)] +struct NonceDiff { + source_start: Option, + target_start: Option, + pending_source_txs: HashSet, +} + +#[derive(thiserror::Error, Debug)] +pub(crate) enum MapNonceError { + #[error("Source chain access key not yet on chain")] + SourceKeyNotOnChain, + #[error("Target chain access key not yet on chain")] + TargetKeyNotOnChain, + #[error("Nonce arithmetic overflow: {0} + {1}")] + AddOverflow(Nonce, Nonce), + #[error("Nonce arithmetic overflow: {0} - {1}")] + SubOverflow(Nonce, Nonce), +} + +impl NonceDiff { + fn set_source(&mut self, nonce: Nonce) { + self.source_start = Some(nonce); + self.pending_source_txs.clear(); + } + + fn map(&self, nonce: Nonce) -> Result { + let source_start = self.source_start.ok_or(MapNonceError::SourceKeyNotOnChain)?; + let target_start = self.target_start.ok_or(MapNonceError::TargetKeyNotOnChain)?; + if target_start > source_start { + let diff = target_start - source_start; + nonce.checked_add(diff).ok_or_else(|| MapNonceError::AddOverflow(nonce, diff)) + } else { + let diff = source_start - target_start; + nonce.checked_sub(diff).ok_or_else(|| MapNonceError::SubOverflow(nonce, diff)) + } + } + + fn known(&self) -> bool { + self.source_start.is_some() && self.target_start.is_some() + } +} + +struct TxMirror { + target_stream: mpsc::Receiver, + source_view_client: Addr, + source_client: Addr, + target_view_client: Addr, + target_client: Addr, + db: DB, + target_genesis_height: BlockHeight, + target_min_block_production_delay: Duration, + tracked_shards: Vec, + secret: Option<[u8; crate::secret::SECRET_LEN]>, + next_source_height: Option, +} + +fn open_db>(home: P, config: &NearConfig) -> anyhow::Result { + let db_path = + near_store::NodeStorage::opener(home.as_ref(), &config.config.store).path().join("mirror"); + let mut options = rocksdb::Options::default(); + options.create_missing_column_families(true); + options.create_if_missing(true); + let cf_descriptors = DBCol::iter() + .map(|col| rocksdb::ColumnFamilyDescriptor::new(col.name(), options.clone())) + .collect::>(); + Ok(DB::open_cf_descriptors(&options, db_path, cf_descriptors)?) +} + +// a transaction that's almost prepared, except that we don't yet know +// what nonce to use because the public key was added in an AddKey +// action that we haven't seen on chain yet. The tx field is complete +// except for the nonce field. +#[derive(Debug)] +struct TxAwaitingNonce { + source_public: PublicKey, + source_signer_id: AccountId, + target_private: SecretKey, + tx: Transaction, +} + +#[derive(Debug)] +enum TargetChainTx { + Ready(SignedTransaction), + AwaitingNonce(TxAwaitingNonce), +} + +impl TargetChainTx { + // For an AwaitingNonce(_), set the nonce and sign the transaction, changing self into Ready(_). + // must not be called if self is Ready(_) + fn set_nonce(&mut self, nonce: Nonce) { + match self { + Self::AwaitingNonce(t) => { + t.tx.nonce = nonce; + let tx = SignedTransaction::new( + t.target_private.sign(&t.tx.get_hash_and_size().0.as_ref()), + t.tx.clone(), + ); + tracing::debug!( + target: "mirror", "prepared a transaction for ({:?}, {:?}) that was previously waiting for the access key to appear on chain", + &tx.transaction.signer_id, &tx.transaction.public_key + ); + *self = Self::Ready(tx); + } + Self::Ready(_) => unreachable!(), + } + } +} + +#[derive(Debug)] +struct MappedChunk { + txs: Vec, + shard_id: ShardId, +} + +#[derive(Debug)] +struct MappedBlock { + source_height: BlockHeight, + chunks: Vec, +} + +async fn account_exists( + view_client: &Addr, + account_id: &AccountId, + prev_block: &CryptoHash, +) -> anyhow::Result { + match view_client + .send( + Query::new( + BlockReference::BlockId(BlockId::Hash(prev_block.clone())), + QueryRequest::ViewAccount { account_id: account_id.clone() }, + ) + .with_span_context(), + ) + .await? + { + Ok(res) => match res.kind { + QueryResponseKind::ViewAccount(_) => Ok(true), + other => { + panic!("Received unexpected QueryResponse after Querying Account: {:?}", other); + } + }, + Err(e) => match &e { + QueryError::UnknownAccount { .. } => Ok(false), + _ => Err(e.into()), + }, + } +} + +async fn fetch_access_key_nonce( + view_client: &Addr, + account_id: &AccountId, + public_key: &PublicKey, + block_hash: Option<&CryptoHash>, +) -> anyhow::Result> { + let block_ref = match block_hash { + Some(h) => BlockReference::BlockId(BlockId::Hash(h.clone())), + None => BlockReference::Finality(Finality::None), + }; + match view_client + .send( + Query::new( + block_ref, + QueryRequest::ViewAccessKey { + account_id: account_id.clone(), + public_key: public_key.clone(), + }, + ) + .with_span_context(), + ) + .await? + { + Ok(res) => match res.kind { + QueryResponseKind::AccessKey(access_key) => Ok(Some(access_key.nonce)), + other => { + panic!("Received unexpected QueryResponse after Querying Access Key: {:?}", other); + } + }, + Err(_) => Ok(None), + } +} + +#[derive(Clone, Debug)] +enum TxOutcome { + Success(CryptoHash), + Pending, + Failure, +} + +async fn fetch_tx_outcome( + view_client: &Addr, + transaction_hash: CryptoHash, + signer_id: &AccountId, + receiver_id: &AccountId, +) -> anyhow::Result { + let receipt_id = match view_client + .send( + GetExecutionOutcome { + id: TransactionOrReceiptId::Transaction { + transaction_hash, + sender_id: signer_id.clone(), + }, + } + .with_span_context(), + ) + .await + .unwrap() + { + Ok(GetExecutionOutcomeResponse { outcome_proof, .. }) => { + match outcome_proof.outcome.status { + ExecutionStatusView::SuccessReceiptId(id) => id, + ExecutionStatusView::SuccessValue(_) => unreachable!(), + ExecutionStatusView::Failure(_) | ExecutionStatusView::Unknown => { + return Ok(TxOutcome::Failure) + } + } + } + Err( + GetExecutionOutcomeError::NotConfirmed { .. } + | GetExecutionOutcomeError::UnknownBlock { .. }, + ) => return Ok(TxOutcome::Pending), + Err(e) => { + return Err(e) + .with_context(|| format!("failed fetching outcome for tx {}", transaction_hash)) + } + }; + match view_client + .send( + GetExecutionOutcome { + id: TransactionOrReceiptId::Receipt { + receipt_id, + receiver_id: receiver_id.clone(), + }, + } + .with_span_context(), + ) + .await + .unwrap() + { + Ok(GetExecutionOutcomeResponse { outcome_proof, .. }) => { + match outcome_proof.outcome.status { + ExecutionStatusView::SuccessReceiptId(_) | ExecutionStatusView::SuccessValue(_) => { + // the view client code actually modifies the outcome's block_hash field to be the + // next block with a new chunk in the relevant shard, so go backwards one block, + // since that's what we'll want to give in the query for AccessKeys + let block = view_client + .send( + GetBlock(BlockReference::BlockId(BlockId::Hash( + outcome_proof.block_hash, + ))) + .with_span_context(), + ) + .await + .unwrap() + .with_context(|| { + format!("failed fetching block {}", &outcome_proof.block_hash) + })?; + Ok(TxOutcome::Success(block.header.prev_hash)) + } + ExecutionStatusView::Failure(_) | ExecutionStatusView::Unknown => { + Ok(TxOutcome::Failure) + } + } + } + Err( + GetExecutionOutcomeError::NotConfirmed { .. } + | GetExecutionOutcomeError::UnknownBlock { .. } + | GetExecutionOutcomeError::UnknownTransactionOrReceipt { .. }, + ) => Ok(TxOutcome::Pending), + Err(e) => { + Err(e).with_context(|| format!("failed fetching outcome for receipt {}", &receipt_id)) + } + } +} + +async fn block_hash_to_height( + view_client: &Addr, + hash: &CryptoHash, +) -> anyhow::Result { + Ok(view_client + .send(GetBlock(BlockReference::BlockId(BlockId::Hash(hash.clone()))).with_span_context()) + .await + .unwrap()? + .header + .height) +} + +impl TxMirror { + fn new>( + source_home: P, + target_home: P, + secret: Option<[u8; crate::secret::SECRET_LEN]>, + ) -> anyhow::Result { + let target_config = + nearcore::config::load_config(target_home.as_ref(), GenesisValidationMode::UnsafeFast) + .with_context(|| { + format!("Error loading target config from {:?}", target_home.as_ref()) + })?; + let db = + open_db(target_home.as_ref(), &target_config).context("failed to open mirror DB")?; + let source_config = + nearcore::config::load_config(source_home.as_ref(), GenesisValidationMode::UnsafeFast) + .with_context(|| { + format!("Error loading source config from {:?}", source_home.as_ref()) + })?; + + let source_node = nearcore::start_with_config(source_home.as_ref(), source_config.clone()) + .context("failed to start source chain NEAR node")?; + + let target_indexer = Indexer::new(near_indexer::IndexerConfig { + home_dir: target_home.as_ref().to_path_buf(), + sync_mode: near_indexer::SyncModeEnum::LatestSynced, + await_for_node_synced: near_indexer::AwaitForNodeSyncedEnum::WaitForFullSync, + }) + .context("failed to start target chain indexer")?; + let (target_view_client, target_client) = target_indexer.client_actors(); + let target_stream = target_indexer.streamer(); + + Ok(Self { + source_view_client: source_node.view_client, + source_client: source_node.client, + target_client, + target_view_client, + target_stream, + db, + target_genesis_height: target_config.genesis.config.genesis_height, + target_min_block_production_delay: target_config + .client_config + .min_block_production_delay, + tracked_shards: target_config.config.tracked_shards.clone(), + secret, + next_source_height: None, + }) + } + + fn get_next_source_height(&mut self) -> anyhow::Result { + if let Some(height) = self.next_source_height { + return Ok(height); + } + let height = + self.db.get_cf(self.db.cf_handle(DBCol::Misc.name()).unwrap(), "next_source_height")?; + match height { + Some(h) => { + let height = BlockHeight::try_from_slice(&h).unwrap(); + self.next_source_height = Some(height); + Ok(height) + } + None => Ok(self.target_genesis_height), + } + } + + async fn send_transactions( + &mut self, + block: &MappedBlock, + ) -> anyhow::Result> { + let mut sent = vec![]; + for chunk in block.chunks.iter() { + for tx in chunk.txs.iter() { + match tx { + TargetChainTx::Ready(tx) => { + match self + .target_client + .send( + NetworkClientMessages::Transaction { + transaction: tx.clone(), + is_forwarded: false, + check_only: false, + } + .with_span_context(), + ) + .await? + { + NetworkClientResponses::RequestRouted => { + crate::metrics::TRANSACTIONS_SENT.with_label_values(&["ok"]).inc(); + sent.push(tx.clone()); + } + NetworkClientResponses::InvalidTx(e) => { + // TODO: here if we're getting an error because the tx was already included, it is possible + // that some other instance of this code ran and made progress already. For now we can assume + // only once instance of this code will run, but this is the place to detect if that's not the case. + tracing::error!( + target: "mirror", "Tried to send an invalid tx from source #{} shard {}: {:?}", + block.source_height, chunk.shard_id, e + ); + crate::metrics::TRANSACTIONS_SENT + .with_label_values(&["invalid"]) + .inc(); + } + r => { + tracing::error!( + target: "mirror", "Unexpected response sending tx from source #{} shard {}: {:?}. The transaction was not sent", + block.source_height, chunk.shard_id, r + ); + crate::metrics::TRANSACTIONS_SENT + .with_label_values(&["internal_error"]) + .inc(); + } + } + } + TargetChainTx::AwaitingNonce(tx) => { + // TODO: here we should just save this transaction for later and send it when it's known + tracing::warn!(target: "mirror", "skipped sending transaction with signer {} because valid target chain nonce not known", &tx.source_signer_id) + } + } + } + } + Ok(sent) + } + + fn read_nonce_diff( + &self, + account_id: &AccountId, + public_key: &PublicKey, + ) -> anyhow::Result> { + let db_key = nonce_col_key(account_id, public_key); + // TODO: cache this? + Ok(self + .db + .get_cf(self.db.cf_handle(DBCol::Nonces.name()).unwrap(), &db_key)? + .map(|v| NonceDiff::try_from_slice(&v).unwrap())) + } + + fn put_nonce_diff( + &self, + account_id: &AccountId, + public_key: &PublicKey, + diff: &NonceDiff, + ) -> anyhow::Result<()> { + tracing::debug!(target: "mirror", "storing {:?} in DB for ({:?}, {:?})", &diff, account_id, public_key); + let db_key = nonce_col_key(account_id, public_key); + self.db.put_cf( + self.db.cf_handle(DBCol::Nonces.name()).unwrap(), + &db_key, + &diff.try_to_vec().unwrap(), + )?; + Ok(()) + } + + // If the access key was present in the genesis records, just + // return the same nonce. Otherwise, we need to change the + // nonce. So check if we already know what the difference in + // nonces is, and if not, try to fetch that info and store it. + // `source_signer_id` and `target_signer_id` are the same unless + // it's an implicit account + async fn map_nonce( + &self, + source_signer_id: &AccountId, + target_signer_id: &AccountId, + source_public: &PublicKey, + target_public: &PublicKey, + nonce: Nonce, + ) -> anyhow::Result> { + let mut diff = match self.read_nonce_diff(source_signer_id, source_public)? { + Some(m) => m, + // If it's not stored in the database, it's an access key that was present in the genesis + // records, so we don't need to do anything to the nonce. + None => return Ok(Ok(nonce)), + }; + if diff.known() { + return Ok(diff.map(nonce)); + } + + self.update_nonces( + source_signer_id, + target_signer_id, + source_public, + target_public, + &mut diff, + ) + .await?; + Ok(diff.map(nonce)) + } + + async fn update_nonces( + &self, + source_signer_id: &AccountId, + target_signer_id: &AccountId, + source_public: &PublicKey, + target_public: &PublicKey, + diff: &mut NonceDiff, + ) -> anyhow::Result<()> { + let mut rewrite = false; + if diff.source_start.is_none() { + self.update_source_nonce(source_signer_id, source_public, diff).await?; + rewrite |= diff.source_start.is_some(); + } + if diff.target_start.is_none() { + diff.target_start = fetch_access_key_nonce( + &self.target_view_client, + target_signer_id, + target_public, + None, + ) + .await?; + rewrite |= diff.target_start.is_some(); + } + + if rewrite { + self.put_nonce_diff(source_signer_id, source_public, diff)?; + } + Ok(()) + } + + async fn update_source_nonce( + &self, + account_id: &AccountId, + public_key: &PublicKey, + diff: &mut NonceDiff, + ) -> anyhow::Result<()> { + let mut block_height = 0; + let mut block_hash = CryptoHash::default(); + let mut failed_txs = Vec::new(); + + // first find the earliest block hash where the access key should exist + for tx in diff.pending_source_txs.iter() { + match fetch_tx_outcome( + &self.source_view_client, + tx.tx_hash.clone(), + &tx.signer_id, + &tx.receiver_id, + ) + .await? + { + TxOutcome::Success(hash) => { + let height = + block_hash_to_height(&self.source_view_client, &hash).await.with_context( + || format!("failed fetching block height of block {}", &hash), + )?; + if &block_hash == &CryptoHash::default() || block_height > height { + block_height = height; + block_hash = hash; + } + } + TxOutcome::Failure => { + failed_txs.push(tx.clone()); + } + TxOutcome::Pending => {} + } + } + if &block_hash == &CryptoHash::default() { + // no need to do this if block_hash is set because set_source() below will clear it + for tx in failed_txs.iter() { + diff.pending_source_txs.remove(tx); + } + return Ok(()); + } + let nonce = fetch_access_key_nonce( + &self.source_view_client, + account_id, + public_key, + Some(&block_hash), + ) + .await? + .ok_or_else(|| { + anyhow::anyhow!( + "expected access key to exist for {}, {} after finding successful receipt in {}", + &account_id, + &public_key, + &block_hash + ) + })?; + diff.set_source(nonce); + Ok(()) + } + + // we have a situation where nonces need to be mapped (AddKey actions + // or implicit account transfers). So store the initial nonce data in the DB. + async fn store_source_nonce( + &self, + tx: &SignedTransactionView, + public_key: &PublicKey, + ) -> anyhow::Result<()> { + // TODO: probably better to use a merge operator here. Not urgent, though. + let mut diff = self.read_nonce_diff(&tx.receiver_id, &public_key)?.unwrap_or_default(); + if diff.source_start.is_some() { + return Ok(()); + } + diff.pending_source_txs.insert(TxIds { + tx_hash: tx.hash.clone(), + signer_id: tx.signer_id.clone(), + receiver_id: tx.receiver_id.clone(), + }); + self.update_source_nonce(&tx.receiver_id, &public_key, &mut diff).await?; + self.put_nonce_diff(&tx.receiver_id, &public_key, &diff) + } + + async fn map_actions( + &self, + tx: &SignedTransactionView, + prev_block: &CryptoHash, + ) -> anyhow::Result> { + let mut actions = Vec::new(); + + for a in tx.actions.iter() { + // this try_from() won't fail since the ActionView was constructed from the Action + let action = Action::try_from(a.clone()).unwrap(); + + match &action { + Action::AddKey(add_key) => { + self.store_source_nonce(tx, &add_key.public_key).await?; + + let replacement = + crate::key_mapping::map_key(&add_key.public_key, self.secret.as_ref()); + + actions.push(Action::AddKey(AddKeyAction { + public_key: replacement.public_key(), + access_key: add_key.access_key.clone(), + })); + } + Action::DeleteKey(delete_key) => { + let replacement = + crate::key_mapping::map_key(&delete_key.public_key, self.secret.as_ref()); + let public_key = replacement.public_key(); + + actions.push(Action::DeleteKey(DeleteKeyAction { public_key })); + } + Action::Transfer(_) => { + if tx.receiver_id.is_implicit() + && !account_exists(&self.source_view_client, &tx.receiver_id, prev_block) + .await + .with_context(|| { + format!("failed checking existence for account {}", &tx.receiver_id) + })? + { + let public_key = crate::key_mapping::implicit_account_key(&tx.receiver_id); + self.store_source_nonce(tx, &public_key).await?; + } + actions.push(action); + } + // We don't want to mess with the set of validators in the target chain + Action::Stake(_) => {} + _ => actions.push(action), + }; + } + Ok(actions) + } + + // fetch the source chain block at `source_height`, and prepare a + // set of transactions that should be valid in the target chain + // from it. + async fn fetch_txs( + &self, + source_height: BlockHeight, + ref_hash: CryptoHash, + ) -> anyhow::Result> { + let prev_hash = match self + .source_view_client + .send( + GetBlock(BlockReference::BlockId(BlockId::Height(source_height))) + .with_span_context(), + ) + .await + .unwrap() + { + Ok(b) => b.header.prev_hash, + Err(GetBlockError::UnknownBlock { .. }) => return Ok(None), + Err(e) => return Err(e.into()), + }; + let mut chunks = Vec::new(); + for shard_id in self.tracked_shards.iter() { + let mut txs = Vec::new(); + + let chunk = match self + .source_view_client + .send(GetChunk::Height(source_height, *shard_id).with_span_context()) + .await? + { + Ok(c) => c, + Err(e) => match e { + GetChunkError::UnknownBlock { .. } => return Ok(None), + GetChunkError::UnknownChunk { .. } => { + tracing::error!( + "Can't fetch source chain shard {} chunk at height {}. Are we tracking all shards?", + shard_id, source_height + ); + continue; + } + _ => return Err(e.into()), + }, + }; + if chunk.header.height_included != source_height { + continue; + } + + let mut num_not_ready = 0; + for t in chunk.transactions { + let actions = self.map_actions(&t, &prev_hash).await?; + if actions.is_empty() { + // If this is a tx containing only stake actions, skip it. + continue; + } + let mapped_key = crate::key_mapping::map_key(&t.public_key, self.secret.as_ref()); + let public_key = mapped_key.public_key(); + + let target_signer_id = + crate::key_mapping::map_account(&t.signer_id, self.secret.as_ref()); + match self + .map_nonce(&t.signer_id, &target_signer_id, &t.public_key, &public_key, t.nonce) + .await? + { + Ok(nonce) => { + let mut tx = Transaction::new( + target_signer_id, + public_key, + crate::key_mapping::map_account(&t.receiver_id, self.secret.as_ref()), + nonce, + ref_hash.clone(), + ); + tx.actions = actions; + let tx = SignedTransaction::new( + mapped_key.sign(&tx.get_hash_and_size().0.as_ref()), + tx, + ); + txs.push(TargetChainTx::Ready(tx)); + } + Err(e) => match e { + MapNonceError::AddOverflow(..) + | MapNonceError::SubOverflow(..) + | MapNonceError::SourceKeyNotOnChain => { + tracing::error!(target: "mirror", "error mapping nonce for ({:?}, {:?}): {:?}", &t.signer_id, &public_key, e); + continue; + } + MapNonceError::TargetKeyNotOnChain => { + let mut tx = Transaction::new( + crate::key_mapping::map_account(&t.signer_id, self.secret.as_ref()), + public_key, + crate::key_mapping::map_account( + &t.receiver_id, + self.secret.as_ref(), + ), + t.nonce, + ref_hash.clone(), + ); + tx.actions = actions; + txs.push(TargetChainTx::AwaitingNonce(TxAwaitingNonce { + tx, + source_public: t.public_key.clone(), + source_signer_id: t.signer_id.clone(), + target_private: mapped_key, + })); + num_not_ready += 1; + } + }, + }; + } + if num_not_ready == 0 { + tracing::debug!( + target: "mirror", "prepared {} transacations for source chain #{} shard {}", + txs.len(), source_height, shard_id + ); + } else { + tracing::debug!( + target: "mirror", "prepared {} transacations for source chain #{} shard {} {} of which are \ + still waiting for the corresponding access keys to make it on chain", + txs.len(), source_height, shard_id, num_not_ready, + ); + } + chunks.push(MappedChunk { txs, shard_id: *shard_id }); + } + Ok(Some(MappedBlock { source_height, chunks })) + } + + // Up to a certain capacity, prepare and queue up batches of + // transactions that we want to send to the target chain. + async fn queue_txs( + &mut self, + tracker: &mut crate::chain_tracker::TxTracker, + ref_hash: CryptoHash, + check_send_time: bool, + ) -> anyhow::Result<()> { + if tracker.num_blocks_queued() > 100 { + return Ok(()); + } + + let next_batch_time = tracker.next_batch_time(); + let source_head = + self.get_source_height().await.context("can't fetch source chain HEAD")?; + let start_height = match tracker.height_queued() { + Some(h) => h + 1, + None => self.get_next_source_height()?, + }; + + for height in start_height..=source_head { + if let Some(b) = self + .fetch_txs(height, ref_hash) + .await + .with_context(|| format!("Can't fetch source #{} transactions", height))? + { + tracker.queue_block(b); + if tracker.num_blocks_queued() > 100 { + return Ok(()); + } + }; + + if check_send_time + && tracker.num_blocks_queued() > 0 + && Instant::now() > next_batch_time - Duration::from_millis(20) + { + return Ok(()); + } + } + Ok(()) + } + + fn set_next_source_height(&mut self, height: BlockHeight) -> anyhow::Result<()> { + self.next_source_height = Some(height); + // TODO: we should instead save something like the + // (block_height, shard_id, idx_in_chunk) of the last + // transaction sent. Currently we set next_source_height after + // sending all of the transactions in that chunk, so if we get + // SIGTERM or something in the middle of sending a batch of + // txs, we'll send some that we already sent next time we + // start. Not a giant problem but kind of unclean. + self.db.put_cf( + self.db.cf_handle(DBCol::Misc.name()).unwrap(), + "next_source_height", + height.try_to_vec().unwrap(), + )?; + Ok(()) + } + + // Go through any upcoming batches of transactions that we haven't + // been able to set a valid nonce for yet, and see if we can now + // do that. + async fn set_nonces( + &self, + tracker: &mut crate::chain_tracker::TxTracker, + ) -> anyhow::Result<()> { + let next_batch_time = tracker.next_batch_time(); + let mut txs_ready = Vec::new(); + let mut keys_mapped = HashSet::new(); + + for (source_signer_id, source_public_key) in tracker.pending_access_keys_iter() { + let mut diff = self.read_nonce_diff(source_signer_id, source_public_key)?.unwrap(); + let target_signer_id = + crate::key_mapping::map_account(source_signer_id, self.secret.as_ref()); + let target_public_key = + crate::key_mapping::map_key(source_public_key, self.secret.as_ref()).public_key(); + self.update_nonces( + &source_signer_id, + &target_signer_id, + &source_public_key, + &target_public_key, + &mut diff, + ) + .await?; + if diff.known() { + keys_mapped.insert((source_signer_id.clone(), source_public_key.clone())); + } + } + for (tx_ref, tx) in tracker.tx_awaiting_nonce_iter() { + if keys_mapped.contains(&(tx.source_signer_id.clone(), tx.source_public.clone())) { + let nonce = self + .map_nonce( + &tx.source_signer_id, + &tx.tx.signer_id, + &tx.source_public, + &tx.tx.public_key, + tx.tx.nonce, + ) + .await? + .unwrap(); + txs_ready.push((tx_ref.clone(), nonce)); + } + + if Instant::now() > next_batch_time - Duration::from_millis(20) { + break; + } + } + for (tx_ref, nonce) in txs_ready { + tracker.set_tx_nonce(&tx_ref, nonce); + } + Ok(()) + } + + async fn main_loop( + &mut self, + mut tracker: crate::chain_tracker::TxTracker, + mut target_height: BlockHeight, + mut target_head: CryptoHash, + ) -> anyhow::Result<()> { + loop { + tokio::select! { + // time to send a batch of transactions + mapped_block = tracker.next_batch(), if tracker.num_blocks_queued() > 0 => { + let mapped_block = mapped_block.unwrap(); + let sent = self.send_transactions(&mapped_block).await?; + tracker.on_txs_sent(&sent, mapped_block.source_height, target_height); + + // now we have one second left until we need to send more transactions. In the + // meantime, we might as well prepare some more batches of transactions. + // TODO: continue in best effort fashion on error + self.set_next_source_height(mapped_block.source_height+1)?; + self.queue_txs(&mut tracker, target_head, true).await?; + } + msg = self.target_stream.recv() => { + let msg = msg.unwrap(); + tracker.on_target_block(&msg); + self.set_nonces(&mut tracker).await?; + target_head = msg.block.header.hash; + target_height = msg.block.header.height; + } + // If we don't have any upcoming sets of transactions to send already built, we probably fell behind in the source + // chain and can't fetch the transactions. Check if we have them now here. + _ = tokio::time::sleep(Duration::from_millis(200)), if tracker.num_blocks_queued() == 0 => { + self.queue_txs(&mut tracker, target_head, true).await?; + } + }; + } + } + + async fn get_source_height(&self) -> Option { + self.source_client + .send( + near_client::Status { is_health_check: false, detailed: false }.with_span_context(), + ) + .await + .unwrap() + .ok() + .map(|s| s.sync_info.latest_block_height) + } + + // wait until HEAD moves. We don't really need it to be fully synced. + async fn wait_source_ready(&self) { + let mut first_height = None; + loop { + if let Some(head) = self.get_source_height().await { + match first_height { + Some(h) => { + if h != head { + return; + } + } + None => { + first_height = Some(head); + } + } + } + + tokio::time::sleep(Duration::from_millis(500)).await; + } + } + + async fn wait_target_synced(&mut self) -> (BlockHeight, CryptoHash) { + let msg = self.target_stream.recv().await.unwrap(); + (msg.block.header.height, msg.block.header.hash) + } + + async fn run(mut self) -> anyhow::Result<()> { + let mut tracker = + crate::chain_tracker::TxTracker::new(self.target_min_block_production_delay); + self.wait_source_ready().await; + let (target_height, target_head) = self.wait_target_synced().await; + + self.queue_txs(&mut tracker, target_head, false).await?; + + self.main_loop(tracker, target_height, target_head).await + } +} + +async fn run>( + source_home: P, + target_home: P, + secret: Option<[u8; crate::secret::SECRET_LEN]>, +) -> anyhow::Result<()> { + let m = TxMirror::new(source_home, target_home, secret)?; + m.run().await +} diff --git a/tools/mirror/src/metrics.rs b/tools/mirror/src/metrics.rs new file mode 100644 index 00000000000..c26c563cfaa --- /dev/null +++ b/tools/mirror/src/metrics.rs @@ -0,0 +1,21 @@ +use near_o11y::metrics::{ + try_create_int_counter, try_create_int_counter_vec, IntCounter, IntCounterVec, +}; +use once_cell::sync::Lazy; + +pub static TRANSACTIONS_SENT: Lazy = Lazy::new(|| { + try_create_int_counter_vec( + "near_mirror_transactions_sent", + "Total number of transactions sent", + &["status"], + ) + .unwrap() +}); + +pub static TRANSACTIONS_INCLUDED: Lazy = Lazy::new(|| { + try_create_int_counter( + "near_mirror_transactions_included", + "Total number of transactions sent that made it on-chain", + ) + .unwrap() +}); diff --git a/tools/mirror/src/secret.rs b/tools/mirror/src/secret.rs new file mode 100644 index 00000000000..6107a831aa8 --- /dev/null +++ b/tools/mirror/src/secret.rs @@ -0,0 +1,85 @@ +use rand_core::{OsRng, RngCore}; +use serde::{Deserialize, Serialize}; +use std::fs::File; +use std::io::Write; +use std::path::Path; +use std::str::FromStr; + +pub const SECRET_LEN: usize = 64; +struct KeyMapSecret([u8; SECRET_LEN]); + +#[derive(Serialize, Deserialize)] +struct MirrorSecretConfig { + pub key_map_secret: Option, +} + +impl serde::Serialize for KeyMapSecret { + fn serialize( + &self, + serializer: S, + ) -> Result<::Ok, ::Error> + where + S: serde::Serializer, + { + let data = bs58::encode(&self.0[..]).into_string(); + serializer.serialize_str(&data) + } +} + +impl<'de> serde::Deserialize<'de> for KeyMapSecret { + fn deserialize(deserializer: D) -> Result>::Error> + where + D: serde::Deserializer<'de>, + { + let s = ::deserialize(deserializer)?; + Self::from_str(&s).map_err(|err| serde::de::Error::custom(format!("{:?}", err))) + } +} + +#[derive(thiserror::Error, Debug)] +pub(crate) enum ParseSecretError { + #[error("Base58 decode failure: `{1}`")] + BS58(#[source] bs58::decode::Error, String), + #[error("invalid decoded length (expected: 64, got: {0}: input: `{1}`)")] + BadLength(usize, String), +} + +impl FromStr for KeyMapSecret { + type Err = ParseSecretError; + + fn from_str(s: &str) -> Result { + let mut array = [0; SECRET_LEN]; + let length = bs58::decode(s) + .into(&mut array[..]) + .map_err(|err| Self::Err::BS58(err, s.to_owned()))?; + if length != SECRET_LEN { + return Err(Self::Err::BadLength(length, s.to_owned())); + } + Ok(Self(array)) + } +} + +pub(crate) fn generate>(secret_file_out: P) -> anyhow::Result<[u8; SECRET_LEN]> { + let mut secret = [0; SECRET_LEN]; + let mut out = File::create(secret_file_out)?; + + OsRng.fill_bytes(&mut secret); + let config = MirrorSecretConfig { key_map_secret: Some(KeyMapSecret(secret)) }; + let str = serde_json::to_string_pretty(&config)?; + out.write_all(str.as_bytes())?; + Ok(secret) +} + +pub(crate) fn write_empty>(secret_file_out: P) -> anyhow::Result<()> { + let mut out = File::create(secret_file_out)?; + let config = MirrorSecretConfig { key_map_secret: None }; + let str = serde_json::to_string_pretty(&config)?; + out.write_all(str.as_bytes())?; + Ok(()) +} + +pub fn load>(secret_file: P) -> anyhow::Result> { + let s = std::fs::read_to_string(secret_file)?; + let config: MirrorSecretConfig = serde_json::from_str(&s)?; + Ok(config.key_map_secret.map(|s| s.0)) +} From 1f4192c5b2885f7b109e6485ee6ccbefa2591ef0 Mon Sep 17 00:00:00 2001 From: Alex Kladov Date: Fri, 21 Oct 2022 18:07:52 +0100 Subject: [PATCH 007/103] refactor: simplify error handling in main (#7897) `anyhow` is the type to return from `main`, we dont' get any value here from preserving well-typed errors, and creatng more work down the line to add all future error variants: *surely* we can fail due to more than these two errors, right? --- neard/src/cli.rs | 17 +++-------------- neard/src/main.rs | 6 +++--- 2 files changed, 6 insertions(+), 17 deletions(-) diff --git a/neard/src/cli.rs b/neard/src/cli.rs index 3b6ff113433..9bd806c227c 100644 --- a/neard/src/cli.rs +++ b/neard/src/cli.rs @@ -36,7 +36,7 @@ pub(super) struct NeardCmd { } impl NeardCmd { - pub(super) fn parse_and_run() -> Result<(), RunError> { + pub(super) fn parse_and_run() -> anyhow::Result<()> { let neard_cmd = Self::parse(); // Enable logging of the current thread. @@ -102,16 +102,6 @@ impl NeardCmd { } } -#[derive(thiserror::Error, Debug)] -pub(crate) enum RunError { - #[error("invalid logging directives provided")] - EnvFilter(#[source] BuildEnvFilterError), - #[error("could not install a rayon thread pool")] - RayonInstall(#[source] rayon::ThreadPoolBuildError), - #[error(transparent)] - Other(#[from] anyhow::Error), -} - #[derive(Parser)] pub(super) struct StateViewerCommand { /// By default state viewer opens rocks DB in the read only mode, which allows it to run @@ -681,9 +671,8 @@ impl VerifyProofSubCommand { } } -fn make_env_filter(verbose: Option<&str>) -> Result { - let env_filter = - EnvFilterBuilder::from_env().verbose(verbose).finish().map_err(RunError::EnvFilter)?; +fn make_env_filter(verbose: Option<&str>) -> Result { + let env_filter = EnvFilterBuilder::from_env().verbose(verbose).finish()?; // Sandbox node can log to sandbox logging target via sandbox_debug_log host function. // This is hidden by default so we enable it for sandbox node. let env_filter = if cfg!(feature = "sandbox") { diff --git a/neard/src/main.rs b/neard/src/main.rs index 3663d668daa..00a3abdd011 100644 --- a/neard/src/main.rs +++ b/neard/src/main.rs @@ -2,7 +2,7 @@ mod cli; mod log_config_watcher; use self::cli::NeardCmd; -use crate::cli::RunError; +use anyhow::Context; use near_primitives::version::{Version, PROTOCOL_VERSION}; use near_store::metadata::DB_VERSION; use nearcore::get_default_home; @@ -41,7 +41,7 @@ static ALLOC: near_rust_allocator_proxy::ProxyAllocator Result<(), RunError> { +fn main() -> anyhow::Result<()> { if env::var("RUST_BACKTRACE").is_err() { // Enable backtraces on panics by default. env::set_var("RUST_BACKTRACE", "1"); @@ -50,7 +50,7 @@ fn main() -> Result<(), RunError> { rayon::ThreadPoolBuilder::new() .stack_size(8 * 1024 * 1024) .build_global() - .map_err(RunError::RayonInstall)?; + .context("failed to create the threadpool")?; #[cfg(feature = "memory_stats")] ALLOC.set_report_usage_interval(512 << 20).enable_stack_trace(true); From 2b1a8bb9d8410667eb4cbef290dd5541686231a7 Mon Sep 17 00:00:00 2001 From: nujabes403 Date: Mon, 24 Oct 2022 00:34:18 +0900 Subject: [PATCH 008/103] doc: fix typos (#7904) * doc: fix typo Acton -> Action * doc: fix typo falied -> failed * doc: fix typo recieve -> receive * doc: fix typo infomation -> information * Update tools/delay-detector/README.md Co-authored-by: Michal Nazarewicz --- core/primitives/src/errors.rs | 2 +- pytest/lib/mocknet.py | 2 +- tools/delay-detector/README.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/core/primitives/src/errors.rs b/core/primitives/src/errors.rs index f55b3b2eb94..8354409b1f7 100644 --- a/core/primitives/src/errors.rs +++ b/core/primitives/src/errors.rs @@ -320,7 +320,7 @@ impl Display for ActionsValidationError { impl std::error::Error for ActionsValidationError {} -/// An error happened during Acton execution +/// An error happened during Action execution #[derive( BorshSerialize, BorshDeserialize, Debug, Clone, PartialEq, Eq, Deserialize, Serialize, RpcError, )] diff --git a/pytest/lib/mocknet.py b/pytest/lib/mocknet.py index 6caf853b994..713f8531a90 100644 --- a/pytest/lib/mocknet.py +++ b/pytest/lib/mocknet.py @@ -320,7 +320,7 @@ def send_transaction(node, tx, tx_hash, account_id, timeout=120): elif 'does not exist' in error_data: missing_count += 1 logger.warning( - f'transaction {tx_hash} falied to be recieved by the node, checking again.' + f'transaction {tx_hash} failed to be received by the node, checking again.' ) if missing_count < 20: time.sleep(5) diff --git a/tools/delay-detector/README.md b/tools/delay-detector/README.md index f0822a156d2..c84d05b8770 100644 --- a/tools/delay-detector/README.md +++ b/tools/delay-detector/README.md @@ -25,6 +25,6 @@ More advanced example: d.snapshot("part2") part3(); d.shapshot("part3") - // d_ goes out of scope and prints the total time infomation and time between each 'snapshot' call. + // d goes out of scope and prints the total time information and time between each 'snapshot' call. } ``` From 445ce053d4a0a22a0a33d0c300fc43b7fa5ae0c6 Mon Sep 17 00:00:00 2001 From: Michal Nazarewicz Date: Mon, 24 Oct 2022 11:04:23 +0100 Subject: [PATCH 009/103] crypto: Remove unused randomness module (#7907) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The module has been introduced in commit cbcf678ea92c: ‘Cryptographic code for randomness beacon’ and then never used. Get rid of it. --- core/crypto/src/lib.rs | 1 - core/crypto/src/randomness.rs | 455 ---------------------------------- core/crypto/src/vrf.rs | 23 +- 3 files changed, 12 insertions(+), 467 deletions(-) delete mode 100644 core/crypto/src/randomness.rs diff --git a/core/crypto/src/lib.rs b/core/crypto/src/lib.rs index 6de50e95e48..8509f082739 100644 --- a/core/crypto/src/lib.rs +++ b/core/crypto/src/lib.rs @@ -16,7 +16,6 @@ mod util; mod errors; pub mod key_conversion; mod key_file; -pub mod randomness; mod signature; mod signer; mod test_utils; diff --git a/core/crypto/src/randomness.rs b/core/crypto/src/randomness.rs deleted file mode 100644 index 12e62a7b60a..00000000000 --- a/core/crypto/src/randomness.rs +++ /dev/null @@ -1,455 +0,0 @@ -use crate::hash::Hash512; -use crate::util::{unpack, vmul2, Packable, Point, Scalar}; -use arrayref::{array_ref, array_refs}; -use c2_chacha::guts::ChaCha; -use curve25519_dalek::constants::{ - RISTRETTO_BASEPOINT_POINT as G, RISTRETTO_BASEPOINT_TABLE as GT, -}; -use curve25519_dalek::traits::{Identity, VartimeMultiscalarMul}; -use std::borrow::Borrow; -use std::iter::once; -use std::ops::{AddAssign, Deref, DerefMut, Sub}; - -pub use crate::vrf::{PublicKey, SecretKey}; - -#[derive(Clone)] -struct ChaChaScalars(ChaCha, Option<[u8; 32]>); - -impl ChaChaScalars { - fn from_hash(hash: [u8; 32]) -> Self { - ChaChaScalars(ChaCha::new(&hash, &[0; 8]), None) - } -} - -impl Iterator for ChaChaScalars { - type Item = Scalar; - - fn next(&mut self) -> Option { - Some(Scalar::from_bytes_mod_order(match self.1 { - Some(s) => { - self.1 = None; - s - } - None => { - let mut block = [0; 64]; - self.0.refill(10, &mut block); - let (b1, b2) = array_refs!(&block, 32, 32); - self.1 = Some(*b2); - *b1 - } - })) - } - - fn size_hint(&self) -> (usize, Option) { - (usize::max_value(), None) - } -} - -struct ExpandIter(Box<[T]>); - -fn expand(els: E) -> ExpandIter -where - E::Item: Borrow, - for<'a> &'a T: Sub, -{ - let mut res = Vec::with_capacity(els.size_hint().0); - for vv in els { - let mut v = *vv.borrow(); - for v2 in res.iter_mut() { - let dif = &v - v2; - *v2 = v; - v = dif; - } - res.push(v); - } - ExpandIter(res.into_boxed_slice()) -} - -impl Iterator for ExpandIter -where - for<'a> T: AddAssign<&'a T>, -{ - type Item = T; - - fn next(&mut self) -> Option { - Some(if self.0.is_empty() { - T::default() - } else { - let mut v = self.0[self.0.len() - 1]; - let r = 0..self.0.len() - 1; - for v2 in self.0[r].iter_mut().rev() { - v += &*v2; - *v2 = v; - } - v - }) - } - - fn size_hint(&self) -> (usize, Option) { - (usize::max_value(), None) - } -} - -#[derive(Copy, Clone, PartialEq, Eq, Debug)] -pub struct Params { - n: usize, - k: usize, -} - -impl Params { - pub fn new(n: usize, k: usize) -> Self { - if !Self::is_valid(n, k) { - panic!("Invalid parameters"); - } - Params { n, k } - } - - pub const fn is_valid(n: usize, k: usize) -> bool { - Self::is_valid_n(n) & (k <= n) - } - - const fn is_valid_n(n: usize) -> bool { - (n <= u32::max_value() as usize) & (n <= (usize::max_value() - 64) / 32) - } - - pub const fn n(&self) -> usize { - self.n - } - - pub const fn k(&self) -> usize { - self.k - } -} - -#[derive(Clone, PartialEq, Eq)] -pub struct PublicShares(pub Box<[u8]>); -#[derive(Clone, PartialEq, Eq)] -pub struct SecretShares(Box<[Scalar]>); -#[derive(Clone, PartialEq, Eq)] -pub struct ValidatedPublicShares(Box<[Point]>); -value_type!(pub, EncryptedShare, 32, "encrypted share"); -#[derive(Copy, Clone, PartialEq, Eq)] -pub struct DecryptedShare(Scalar); -value_type!(pub, DecryptionFailureProof, 96, "decryption failure proof"); -#[derive(Clone, PartialEq, Eq)] -pub struct RandomEpoch(Box<[Point]>); -#[derive(Copy, Clone, PartialEq, Eq)] -pub struct RandomEpochSecret(Scalar); -#[derive(Copy, Clone)] -pub struct RandomRound([u8; 32], Point); -value_type!(pub, RandomShare, 96, "random share"); -#[derive(Copy, Clone, PartialEq, Eq)] -pub struct ValidatedRandomShare(Point); -value_type!(pub, RandomValue, 32, "random value"); - -impl PublicShares { - pub const fn length(Params { k, .. }: Params) -> usize { - k * 32 + 64 - } - - pub fn validate(&self, key: &PublicKey) -> Option { - let k = (self.0.len() - 64) / 32; - assert!(self.0.len() >= 64 && self.0.len() % 32 == 0 && Params::is_valid_n(k)); - let mut res = Vec::with_capacity(k); - for i in 0..k { - res.push(unpack(array_ref!(self.0, 32 * i, 32))?); - } - let comm = array_ref!(self.0, 32 * k, 32); - let r = unpack(array_ref!(self.0, 32 * k + 32, 32))?; - if Point::vartime_multiscalar_mul( - ChaChaScalars::from_hash(hash!(key, &self.0[..32 * k + 32])).take(k).chain(once(r)), - res.iter().chain(once(&G)), - ) - .pack() - != *comm - { - return None; - } - Some(ValidatedPublicShares(res.into_boxed_slice())) - } -} - -fn xor32(a: [u8; 32], b: [u8; 32]) -> [u8; 32] { - let mut res = [0; 32]; - for i in 0..32 { - res[i] = a[i] ^ b[i]; - } - res -} - -impl SecretShares { - pub fn encrypt(&self, index: usize, key: &PublicKey) -> EncryptedShare { - let s = &self.0[index]; - EncryptedShare(xor32(hash!(s * &key.1), s.pack())) - } -} - -impl ValidatedPublicShares { - fn get_element(&self, index: usize) -> Point { - if index < self.0.len() { - self.0[index] - } else { - expand(self.0.iter()).nth(index - self.0.len()).unwrap() - } - } - - pub fn try_decrypt( - &self, - index: usize, - share: &EncryptedShare, - key: &SecretKey, - ) -> Result { - let p = self.get_element(index); - let ss = (&key.0 * &p).pack(); - if let Some(s) = unpack(&xor32(hash!(&ss), share.0)) { - if &s * > == p { - return Ok(DecryptedShare(s)); - } - } - let k = prs!(key.0, p); - let c = hash_s!(&(key.1).0, p, &ss, &k * >, &k * &p); - Err(DecryptionFailureProof((ss, k - c * key.0, c).pack())) - } - - pub fn is_valid( - &self, - index: usize, - share: &EncryptedShare, - key: &PublicKey, - proof: &DecryptionFailureProof, - ) -> bool { - let p = self.get_element(index); - if let Some(s) = Scalar::unpack(&xor32(hash!(&proof.0[..32]), share.0)) { - if &s * > == p { - return false; - } - } - let (ss, r, c) = unwrap_or_return_false!(unpack(&proof.0)); - hash_s!(&key.0, p, ss, vmul2(r, &G, c, &key.1), vmul2(r, &p, c, &ss)) == c - } -} - -fn i2s(i: usize) -> Scalar { - Scalar::from(i as u64) -} - -impl RandomEpoch { - pub fn from_shares( - Params { n, k }: Params, - mut shares: impl Iterator, - ) -> Self { - let mut res = Vec::with_capacity(n); - match shares.next() { - None => { - res.resize_with(n, Point::identity); - } - Some(s) => { - assert!(s.0.len() == k); - res.extend_from_slice(s.0.deref()); - for s in shares { - assert!(s.0.len() == k); - for i in 0..k { - res[i] += s.0[i]; - } - } - res.extend(expand::(res.iter()).take(n - k)); - } - } - RandomEpoch(res.into_boxed_slice()) - } - - pub fn compute_share( - &self, - round: &RandomRound, - index: usize, - secret: &RandomEpochSecret, - ) -> RandomShare { - let ss = (&secret.0 * &round.1).pack(); - let k = prs!(secret.0, &round.0); - let c = hash_s!(self.0[index], &ss, &k * >, &k * &round.1); - RandomShare((ss, k - c * secret.0, c).pack()) - } - - pub fn validate_share( - &self, - round: &RandomRound, - index: usize, - share: &RandomShare, - ) -> Option { - let key = self.0[index]; - let (ss, r, c) = unpack(&share.0)?; - let uss = unpack(&ss)?; - if hash_s!(key, &ss, vmul2(r, &G, c, &key), vmul2(r, &round.1, c, &uss)) != c { - return None; - } - Some(ValidatedRandomShare(uss)) - } - - pub fn finalize(shares: &[(usize, ValidatedRandomShare)]) -> RandomValue { - debug_assert!(shares.windows(2).all(|w| w[0].0 < w[1].0)); - let mut coeff = Vec::with_capacity(shares.len()); - for (i, (xi, _)) in shares.iter().enumerate() { - let mut v = if i & 1 != 0 { -Scalar::one() } else { Scalar::one() }; - for (xj, _) in &shares[..i] { - v *= i2s(xi - xj); - } - for (xj, _) in &shares[i + 1..] { - v *= i2s(xj - xi); - } - coeff.push(v); - } - Scalar::batch_invert(coeff.deref_mut()); - for (i, v) in coeff.iter_mut().enumerate() { - for (x, _) in shares[..i].iter().chain(&shares[i + 1..]) { - *v *= i2s(x + 1); - } - } - RandomValue(Point::vartime_multiscalar_mul(coeff, shares.iter().map(|p| (p.1).0)).pack()) - } -} - -impl RandomEpochSecret { - pub fn from_shares(mut shares: impl Iterator) -> Self { - RandomEpochSecret(match shares.next() { - None => Scalar::zero(), - Some(DecryptedShare(mut s)) => { - for DecryptedShare(s2) in shares { - s += s2; - } - s - } - }) - } -} - -impl RandomRound { - pub fn new(epoch_id: &[u8; 32], index: u32) -> Self { - // We don't really need to compute Elligator twice, but curve25519-dalek doesn't provide a function which does it only once. - let p = Point::from_hash(hash_chain!(Hash512::default(), epoch_id, &index.to_le_bytes())); - RandomRound(p.pack(), p) - } -} - -impl From<&[u8]> for PublicShares { - fn from(value: &[u8]) -> Self { - PublicShares(value.into()) - } -} - -impl AsRef<[u8]> for PublicShares { - fn as_ref(&self) -> &[u8] { - self.0.as_ref() - } -} - -impl AsMut<[u8]> for PublicShares { - fn as_mut(&mut self) -> &mut [u8] { - self.0.as_mut() - } -} - -impl TryFrom<&str> for PublicShares { - type Error = (); - - fn try_from(value: &str) -> Result { - match bs58::decode(value).into_vec() { - Ok(v) => Ok(PublicShares(v.into_boxed_slice())), - Err(_) => Err(()), - } - } -} - -common_conversions!(PublicShares, "public shares"); - -eq!(RandomRound, |a, b| &a.0 == &b.0); - -#[cfg(test)] -mod tests { - use super::*; - - use rand::rngs::OsRng; - use rand::seq::index; - use rand::RngCore; - - fn generate_shares(Params { n, k }: Params, key: &PublicKey) -> (PublicShares, SecretShares) { - let mut public = Vec::with_capacity(k * 32 + 64); - let mut secret = Vec::with_capacity(n); - for _ in 0..k { - let s = Scalar::random(&mut OsRng); - public.extend_from_slice(&(&s * >).pack()); - secret.push(s); - } - let mut r = Scalar::random(&mut OsRng); - public.extend_from_slice(&(&r * >).pack()); - secret - .iter() - .zip(ChaChaScalars::from_hash(hash!(key, &public))) - .for_each(|(s, c)| r -= c * s); - public.extend_from_slice(&r.pack()); - secret.extend(expand::(secret.iter()).take(n - k)); - debug_assert!(public.len() == PublicShares::length(Params { n, k }) && secret.len() == n); - (PublicShares(public.into_boxed_slice()), SecretShares(secret.into_boxed_slice())) - } - - #[test] - fn test_u32_max_value_fits_usize() { - // This is used in Params::is_valid_n(). - assert_eq!(u32::max_value() as usize as u32, u32::max_value()); - } - - #[test] - fn test_operation() { - let params = Params::new(13, 8); - let gens: usize = 10; - let mut gen_keys = Vec::new(); - for _ in 0..gens { - gen_keys.push(SecretKey::random()); - } - let mut recv_keys = Vec::new(); - for _ in 0..params.n { - recv_keys.push(SecretKey::random()); - } - let mut public_shares = Vec::new(); - let mut decrypted_shares = Vec::new(); - for i in 0..gens { - let (ps, ss) = generate_shares(params, &gen_keys[i].public_key()); - let vs = ps.validate(&gen_keys[i].public_key()).unwrap(); - for j in 0..params.n { - let es = ss.encrypt(j, &recv_keys[j].public_key()); - let ds = vs.try_decrypt(j, &es, &recv_keys[j]).unwrap(); - decrypted_shares.push(ds); - } - public_shares.push(vs); - } - let epoch = RandomEpoch::from_shares(params, public_shares.into_iter()); - let mut epoch_secrets = Vec::new(); - for i in 0..params.n { - let mut dss = Vec::new(); - for j in 0..gens { - dss.push(decrypted_shares[j * params.n + i]); - } - epoch_secrets.push(RandomEpochSecret::from_shares(dss.into_iter())); - } - let mut epoch_id = [0; 32]; - OsRng.fill_bytes(&mut epoch_id); - let random_round = RandomRound::new(&epoch_id, OsRng.next_u32()); - let mut random_shares = Vec::new(); - for i in 0..params.n { - let rs = epoch.compute_share(&random_round, i, &epoch_secrets[i]); - let vrs = epoch.validate_share(&random_round, i, &rs).unwrap(); - random_shares.push(vrs); - } - let produce_value = || { - let mut selected_shares = Vec::new(); - for i in index::sample(&mut OsRng, params.n, params.k).iter() { - selected_shares.push((i, random_shares[i])); - } - selected_shares.sort_unstable_by(|a, b| a.0.cmp(&b.0)); - RandomEpoch::finalize(selected_shares.as_slice()) - }; - let v = produce_value(); - for _ in 0..10 { - assert_eq!(v, produce_value()); - } - } -} diff --git a/core/crypto/src/vrf.rs b/core/crypto/src/vrf.rs index 56211e385be..7f79bddcb70 100644 --- a/core/crypto/src/vrf.rs +++ b/core/crypto/src/vrf.rs @@ -3,14 +3,13 @@ use bs58; use curve25519_dalek::constants::{ RISTRETTO_BASEPOINT_POINT as G, RISTRETTO_BASEPOINT_TABLE as GT, }; -use rand::rngs::OsRng; use std::borrow::Borrow; use subtle::{ConditionallySelectable, ConstantTimeEq}; #[derive(Clone)] pub struct PublicKey(pub(crate) [u8; 32], pub(crate) Point); #[derive(Clone)] -pub struct SecretKey(pub(crate) Scalar, pub(crate) PublicKey); +pub struct SecretKey(Scalar, PublicKey); value_type!(pub, Value, 32, "value"); value_type!(pub, Proof, 64, "proof"); @@ -57,10 +56,6 @@ impl SecretKey { Some(Self::from_scalar(unpack(bytes)?)) } - pub fn random() -> Self { - Self::from_scalar(Scalar::random(&mut OsRng)) - } - pub fn public_key(&self) -> &PublicKey { &self.1 } @@ -111,12 +106,18 @@ traits!(SecretKey, 32, |s| s.0.as_bytes(), "secret key"); #[cfg(test)] mod tests { use super::*; + + use rand::rngs::OsRng; use serde::{Deserialize, Serialize}; use serde_json::{from_str, to_string}; + fn random_secret_key() -> SecretKey { + SecretKey::from_scalar(Scalar::random(&mut OsRng)) + } + #[test] fn test_conversion() { - let sk = SecretKey::random(); + let sk = random_secret_key(); let sk2 = SecretKey::from_bytes(&sk.clone().into()).unwrap(); assert_eq!(sk, sk2); let pk = sk.public_key(); @@ -128,7 +129,7 @@ mod tests { #[test] fn test_verify() { - let sk = SecretKey::random(); + let sk = random_secret_key(); let (val, proof) = sk.compute_vrf_with_proof(b"Test"); let val2 = sk.compute_vrf(b"Test"); assert_eq!(val, val2); @@ -138,8 +139,8 @@ mod tests { #[test] fn test_different_keys() { - let sk = SecretKey::random(); - let sk2 = SecretKey::random(); + let sk = random_secret_key(); + let sk2 = random_secret_key(); assert_ne!(sk, sk2); assert_ne!(Into::<[u8; 32]>::into(sk.clone()), Into::<[u8; 32]>::into(sk2.clone())); let pk = sk.public_key(); @@ -161,7 +162,7 @@ mod tests { #[test] fn test_serialize() { - let sk = SecretKey::random(); + let sk = random_secret_key(); let sk2 = round_trip(&sk); assert_eq!(sk, sk2); let (val, proof) = sk.compute_vrf_with_proof(b"Test"); From 2f69ec3a1bc8fd31ea15e919fd2ae31f5db150f7 Mon Sep 17 00:00:00 2001 From: al002 Date: Mon, 24 Oct 2022 19:31:34 +0800 Subject: [PATCH 010/103] doc: update logo (#7905) Update near logo in README.md https://github.com/near/nearcore/issues/7875 --- docs/images/logo.svg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/images/logo.svg b/docs/images/logo.svg index e3babd2836b..18d1cf21c87 100644 --- a/docs/images/logo.svg +++ b/docs/images/logo.svg @@ -1 +1 @@ -near_logo \ No newline at end of file + \ No newline at end of file From 3cd74cde18a6bc57f8ebfa8c85394601cf9b059c Mon Sep 17 00:00:00 2001 From: mm-near <91919554+mm-near@users.noreply.github.com> Date: Mon, 24 Oct 2022 14:15:02 +0200 Subject: [PATCH 011/103] [Debug UI] Fixed bug in network html when syncing (#7906) List of peers wasn't printed if we were in sync mode (especially during header/state sync) --- chain/jsonrpc/res/network_info.html | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/chain/jsonrpc/res/network_info.html b/chain/jsonrpc/res/network_info.html index 7721ff7aed5..a367737f88b 100644 --- a/chain/jsonrpc/res/network_info.html +++ b/chain/jsonrpc/res/network_info.html @@ -128,11 +128,18 @@ type: "GET", url: "/debug/api/epoch_info", success: data => { + let epoch_found = false; data.status_response.EpochInfo.forEach(element => { if (element.epoch_id == epoch_id) { + epoch_found = true; producers_callback(element.block_producers, element.chunk_only_producers); } }); + // This can happen if we're in sync mode - in such case, still print the list of peers, + // but don't show producers. + if (epoch_found == false) { + producers_callback([], []); + } }, dataType: "json", error: function (errMsg, textStatus, errorThrown) { From 90c943b8dc428cb47bff728e851ff6f45786a94c Mon Sep 17 00:00:00 2001 From: pompon0 Date: Mon, 24 Oct 2022 15:39:58 +0200 Subject: [PATCH 012/103] removed messages of unimplemented EpochSync (#7911) EpochSync was never implemented, there is just a bunch of stubs left here and there. Removing them. --- chain/client/src/client_actor.rs | 8 - chain/client/src/test_utils.rs | 197 +++++++++--------- chain/client/src/view_client.rs | 8 - chain/network/src/network_protocol/borsh.rs | 12 +- .../src/network_protocol/borsh_conv.rs | 26 +-- chain/network/src/network_protocol/mod.rs | 5 - .../src/network_protocol/network.proto | 29 +-- .../proto_conv/peer_message.rs | 55 ----- chain/network/src/network_protocol/tests.rs | 7 - .../src/peer_manager/peer_manager_actor.rs | 23 -- chain/network/src/testonly/fake_client.rs | 15 -- chain/network/src/types.rs | 68 +----- tools/chainsync-loadtest/src/network.rs | 4 - 13 files changed, 127 insertions(+), 330 deletions(-) diff --git a/chain/client/src/client_actor.rs b/chain/client/src/client_actor.rs index 424f7d02287..11aad2cb667 100644 --- a/chain/client/src/client_actor.rs +++ b/chain/client/src/client_actor.rs @@ -567,14 +567,6 @@ impl ClientActor { NetworkClientResponses::NoResponse } - NetworkClientMessages::EpochSyncResponse(_peer_id, _response) => { - // TODO #3488 - NetworkClientResponses::NoResponse - } - NetworkClientMessages::EpochSyncFinalizationResponse(_peer_id, _response) => { - // TODO #3488 - NetworkClientResponses::NoResponse - } NetworkClientMessages::PartialEncodedChunkRequest(part_request_msg, route_back) => { let _ = self .client diff --git a/chain/client/src/test_utils.rs b/chain/client/src/test_utils.rs index e37c5977dc2..68dbb243159 100644 --- a/chain/client/src/test_utils.rs +++ b/chain/client/src/test_utils.rs @@ -627,7 +627,8 @@ pub fn setup_mock_all_validators( // Note: this `.wait` will block until all `ClientActors` are created. let connectors1 = connectors1.wait(); let mut guard = network_mock1.write().unwrap(); - let (resp, perform_default) = guard.deref_mut()(connectors1.as_slice(), account_id.clone(), &msg); + let (resp, perform_default) = + guard.deref_mut()(connectors1.as_slice(), account_id.clone(), &msg); drop(guard); if perform_default { @@ -643,24 +644,25 @@ pub fn setup_mock_all_validators( .enumerate() .map(|(i, peer_info)| ConnectedPeerInfo { full_peer_info: FullPeerInfo { - peer_info: peer_info.clone(), - chain_info: PeerChainInfoV2 { - genesis_id: GenesisId { - chain_id: "unittest".to_string(), - hash: Default::default(), + peer_info: peer_info.clone(), + chain_info: PeerChainInfoV2 { + genesis_id: GenesisId { + chain_id: "unittest".to_string(), + hash: Default::default(), + }, + height: last_height2[i], + tracked_shards: vec![], + archival: true, }, - height: last_height2[i], - tracked_shards: vec![], - archival: true, + partial_edge_info: PartialEdgeInfo::default(), }, - partial_edge_info: PartialEdgeInfo::default(), - }, received_bytes_per_sec: 0, sent_bytes_per_sec: 0, last_time_peer_requested: near_network::time::Instant::now(), last_time_received_message: near_network::time::Instant::now(), connection_established_time: near_network::time::Instant::now(), - peer_type: PeerType::Outbound, }) + peer_type: PeerType::Outbound, + }) .collect(); let peers2 = peers.iter().map(|it| it.full_peer_info.clone()).collect(); let info = NetworkInfo { @@ -673,7 +675,8 @@ pub fn setup_mock_all_validators( known_producers: vec![], tier1_accounts: vec![], }; - client_addr.do_send(NetworkClientMessages::NetworkInfo(info).with_span_context()); + client_addr + .do_send(NetworkClientMessages::NetworkInfo(info).with_span_context()); } match msg.as_network_requests_ref() { @@ -685,7 +688,14 @@ pub fn setup_mock_all_validators( } for (client, _) in connectors1 { - client.do_send(NetworkClientMessages::Block( block.clone(), PeerInfo::random().id, false, ).with_span_context()); + client.do_send( + NetworkClientMessages::Block( + block.clone(), + PeerInfo::random().id, + false, + ) + .with_span_context(), + ); } let mut last_height1 = last_height1.write().unwrap(); @@ -701,7 +711,11 @@ pub fn setup_mock_all_validators( } NetworkRequests::PartialEncodedChunkRequest { target, request, .. } => { let create_msg = || { - NetworkClientMessages::PartialEncodedChunkRequest(request.clone(), my_address).with_span_context() + NetworkClientMessages::PartialEncodedChunkRequest( + request.clone(), + my_address, + ) + .with_span_context() }; send_chunks( connectors1, @@ -713,7 +727,11 @@ pub fn setup_mock_all_validators( } NetworkRequests::PartialEncodedChunkResponse { route_back, response } => { let create_msg = || { - NetworkClientMessages::PartialEncodedChunkResponse(response.clone(), Clock::instant()).with_span_context() + NetworkClientMessages::PartialEncodedChunkResponse( + response.clone(), + Clock::instant(), + ) + .with_span_context() }; send_chunks( connectors1, @@ -728,7 +746,10 @@ pub fn setup_mock_all_validators( partial_encoded_chunk, } => { let create_msg = || { - NetworkClientMessages::PartialEncodedChunk(partial_encoded_chunk.clone().into()).with_span_context() + NetworkClientMessages::PartialEncodedChunk( + partial_encoded_chunk.clone().into(), + ) + .with_span_context() }; send_chunks( connectors1, @@ -740,7 +761,8 @@ pub fn setup_mock_all_validators( } NetworkRequests::PartialEncodedChunkForward { account_id, forward } => { let create_msg = || { - NetworkClientMessages::PartialEncodedChunkForward(forward.clone()).with_span_context() + NetworkClientMessages::PartialEncodedChunkForward(forward.clone()) + .with_span_context() }; send_chunks( connectors1, @@ -758,64 +780,20 @@ pub fn setup_mock_all_validators( actix::spawn( connectors1[i] .1 - .send(NetworkViewClientMessages::BlockRequest(*hash).with_span_context()) + .send( + NetworkViewClientMessages::BlockRequest(*hash) + .with_span_context(), + ) .then(move |response| { let response = response.unwrap(); match response { NetworkViewClientResponses::Block(block) => { - me.do_send(NetworkClientMessages::Block(*block, peer_id, true).with_span_context()); - } - NetworkViewClientResponses::NoResponse => {} - _ => assert!(false), - } - future::ready(()) - }), - ); - } - } - } - NetworkRequests::EpochSyncRequest { epoch_id, peer_id } => { - for (i, peer_info) in key_pairs.iter().enumerate() { - let peer_id = peer_id.clone(); - if peer_info.id == peer_id { - let me = connectors1[my_ord].0.clone(); - actix::spawn( - connectors1[i] - .1 - .send(NetworkViewClientMessages::EpochSyncRequest { - epoch_id: epoch_id.clone(), - }.with_span_context()) - .then(move |response| { - let response = response.unwrap(); - match response { - NetworkViewClientResponses::EpochSyncResponse(response) => { - me.do_send(NetworkClientMessages::EpochSyncResponse(peer_id, response).with_span_context()); - } - NetworkViewClientResponses::NoResponse => {} - _ => assert!(false), - } - future::ready(()) - }), - ); - } - } - } - NetworkRequests::EpochSyncFinalizationRequest { epoch_id, peer_id } => { - for (i, peer_info) in key_pairs.iter().enumerate() { - let peer_id = peer_id.clone(); - if peer_info.id == peer_id { - let me = connectors1[my_ord].0.clone(); - actix::spawn( - connectors1[i] - .1 - .send(NetworkViewClientMessages::EpochSyncFinalizationRequest { - epoch_id: epoch_id.clone(), - }.with_span_context()) - .then(move |response| { - let response = response.unwrap(); - match response { - NetworkViewClientResponses::EpochSyncFinalizationResponse(response) => { - me.do_send(NetworkClientMessages::EpochSyncFinalizationResponse(peer_id, response).with_span_context()); + me.do_send( + NetworkClientMessages::Block( + *block, peer_id, true, + ) + .with_span_context(), + ); } NetworkViewClientResponses::NoResponse => {} _ => assert!(false), @@ -834,16 +812,24 @@ pub fn setup_mock_all_validators( actix::spawn( connectors1[i] .1 - .send(NetworkViewClientMessages::BlockHeadersRequest( - hashes.clone(), - ).with_span_context()) + .send( + NetworkViewClientMessages::BlockHeadersRequest( + hashes.clone(), + ) + .with_span_context(), + ) .then(move |response| { let response = response.unwrap(); match response { NetworkViewClientResponses::BlockHeaders( headers, ) => { - me.do_send(NetworkClientMessages::BlockHeaders(headers, peer_id).with_span_context()); + me.do_send( + NetworkClientMessages::BlockHeaders( + headers, peer_id, + ) + .with_span_context(), + ); } NetworkViewClientResponses::NoResponse => {} _ => assert!(false), @@ -869,17 +855,25 @@ pub fn setup_mock_all_validators( actix::spawn( connectors1[i] .1 - .send(NetworkViewClientMessages::StateRequestHeader { - shard_id: *shard_id, - sync_hash: *sync_hash, - }.with_span_context()) + .send( + NetworkViewClientMessages::StateRequestHeader { + shard_id: *shard_id, + sync_hash: *sync_hash, + } + .with_span_context(), + ) .then(move |response| { let response = response.unwrap(); match response { NetworkViewClientResponses::StateResponse( response, ) => { - me.do_send(NetworkClientMessages::StateResponse(*response).with_span_context()); + me.do_send( + NetworkClientMessages::StateResponse( + *response, + ) + .with_span_context(), + ); } NetworkViewClientResponses::NoResponse => {} _ => assert!(false), @@ -906,18 +900,26 @@ pub fn setup_mock_all_validators( actix::spawn( connectors1[i] .1 - .send(NetworkViewClientMessages::StateRequestPart { - shard_id: *shard_id, - sync_hash: *sync_hash, - part_id: *part_id, - }.with_span_context()) + .send( + NetworkViewClientMessages::StateRequestPart { + shard_id: *shard_id, + sync_hash: *sync_hash, + part_id: *part_id, + } + .with_span_context(), + ) .then(move |response| { let response = response.unwrap(); match response { NetworkViewClientResponses::StateResponse( response, ) => { - me.do_send(NetworkClientMessages::StateResponse(*response).with_span_context()); + me.do_send( + NetworkClientMessages::StateResponse( + *response, + ) + .with_span_context(), + ); } NetworkViewClientResponses::NoResponse => {} _ => assert!(false), @@ -931,7 +933,10 @@ pub fn setup_mock_all_validators( NetworkRequests::StateResponse { route_back, response } => { for (i, address) in addresses.iter().enumerate() { if route_back == address { - connectors1[i].0.do_send(NetworkClientMessages::StateResponse(response.clone()).with_span_context()); + connectors1[i].0.do_send( + NetworkClientMessages::StateResponse(response.clone()) + .with_span_context(), + ); } } } @@ -944,9 +949,13 @@ pub fn setup_mock_all_validators( if aa.get(&key).is_none() { aa.insert(key); for (_, view_client) in connectors1 { - view_client.do_send(NetworkViewClientMessages::AnnounceAccount( - vec![(announce_account.clone(), None)], - ).with_span_context()) + view_client.do_send( + NetworkViewClientMessages::AnnounceAccount(vec![( + announce_account.clone(), + None, + )]) + .with_span_context(), + ) } } } @@ -975,7 +984,8 @@ pub fn setup_mock_all_validators( NetworkClientMessages::BlockApproval( approval.clone(), my_key_pair.id.clone(), - ).with_span_context(), + ) + .with_span_context(), ); } } @@ -1021,7 +1031,8 @@ pub fn setup_mock_all_validators( }; } resp - }).start(); + }) + .start(); let network_adapter = NetworkRecipient::default(); network_adapter.set_recipient(pm); let (block, client, view_client_addr) = setup( diff --git a/chain/client/src/view_client.rs b/chain/client/src/view_client.rs index 77dde26df38..4557883ac9a 100644 --- a/chain/client/src/view_client.rs +++ b/chain/client/src/view_client.rs @@ -1258,14 +1258,6 @@ impl Handler> for ViewClientActor { NetworkViewClientResponses::AnnounceAccount(filtered_announce_accounts) } - NetworkViewClientMessages::EpochSyncRequest { epoch_id: _epoch_id } => { - // TODO #3488 - NetworkViewClientResponses::NoResponse - } - NetworkViewClientMessages::EpochSyncFinalizationRequest { epoch_id: _epoch_id } => { - // TODO #3488 - NetworkViewClientResponses::NoResponse - } } } } diff --git a/chain/network/src/network_protocol/borsh.rs b/chain/network/src/network_protocol/borsh.rs index edd0a186588..e927bed763e 100644 --- a/chain/network/src/network_protocol/borsh.rs +++ b/chain/network/src/network_protocol/borsh.rs @@ -10,9 +10,7 @@ use near_primitives::block::{Block, BlockHeader, GenesisId}; use near_primitives::challenge::Challenge; use near_primitives::hash::CryptoHash; use near_primitives::network::{AnnounceAccount, PeerId}; -use near_primitives::syncing::{EpochSyncFinalizationResponse, EpochSyncResponse}; use near_primitives::transaction::SignedTransaction; -use near_primitives::types::EpochId; use std::fmt; use std::fmt::Formatter; @@ -134,12 +132,12 @@ pub(super) enum PeerMessage { /// Gracefully disconnect from other peer. Disconnect, Challenge(Challenge), - _HandshakeV2, - EpochSyncRequest(EpochId), - EpochSyncResponse(Box), - EpochSyncFinalizationRequest(EpochId), - EpochSyncFinalizationResponse(Box), + _HandshakeV2, + _EpochSyncRequest, + _EpochSyncResponse, + _EpochSyncFinalizationRequest, + _EpochSyncFinalizationResponse, _RoutingTableSyncV2, } #[cfg(target_arch = "x86_64")] // Non-x86_64 doesn't match this requirement yet but it's not bad as it's not production-ready diff --git a/chain/network/src/network_protocol/borsh_conv.rs b/chain/network/src/network_protocol/borsh_conv.rs index 59d659ae158..61dd2ae8fdc 100644 --- a/chain/network/src/network_protocol/borsh_conv.rs +++ b/chain/network/src/network_protocol/borsh_conv.rs @@ -96,6 +96,8 @@ pub enum ParsePeerMessageError { DeprecatedHandshakeV2, #[error("RoutingTableSyncV2 is deprecated")] DeprecatedRoutingTableSyncV2, + #[error("EpochSync is deprecated")] + DeprecatedEpochSync, } impl TryFrom<&net::PeerMessage> for mem::PeerMessage { @@ -127,15 +129,13 @@ impl TryFrom<&net::PeerMessage> for mem::PeerMessage { net::PeerMessage::Disconnect => mem::PeerMessage::Disconnect, net::PeerMessage::Challenge(c) => mem::PeerMessage::Challenge(c), net::PeerMessage::_HandshakeV2 => return Err(Self::Error::DeprecatedHandshakeV2), - net::PeerMessage::EpochSyncRequest(epoch_id) => { - mem::PeerMessage::EpochSyncRequest(epoch_id) + net::PeerMessage::_EpochSyncRequest => return Err(Self::Error::DeprecatedEpochSync), + net::PeerMessage::_EpochSyncResponse => return Err(Self::Error::DeprecatedEpochSync), + net::PeerMessage::_EpochSyncFinalizationRequest => { + return Err(Self::Error::DeprecatedEpochSync) } - net::PeerMessage::EpochSyncResponse(esr) => mem::PeerMessage::EpochSyncResponse(esr), - net::PeerMessage::EpochSyncFinalizationRequest(epoch_id) => { - mem::PeerMessage::EpochSyncFinalizationRequest(epoch_id) - } - net::PeerMessage::EpochSyncFinalizationResponse(esfr) => { - mem::PeerMessage::EpochSyncFinalizationResponse(esfr) + net::PeerMessage::_EpochSyncFinalizationResponse => { + return Err(Self::Error::DeprecatedEpochSync) } net::PeerMessage::_RoutingTableSyncV2 => { return Err(Self::Error::DeprecatedRoutingTableSyncV2) @@ -175,16 +175,6 @@ impl From<&mem::PeerMessage> for net::PeerMessage { mem::PeerMessage::Routed(r) => net::PeerMessage::Routed(Box::new(r.msg.clone())), mem::PeerMessage::Disconnect => net::PeerMessage::Disconnect, mem::PeerMessage::Challenge(c) => net::PeerMessage::Challenge(c), - mem::PeerMessage::EpochSyncRequest(epoch_id) => { - net::PeerMessage::EpochSyncRequest(epoch_id) - } - mem::PeerMessage::EpochSyncResponse(esr) => net::PeerMessage::EpochSyncResponse(esr), - mem::PeerMessage::EpochSyncFinalizationRequest(epoch_id) => { - net::PeerMessage::EpochSyncFinalizationRequest(epoch_id) - } - mem::PeerMessage::EpochSyncFinalizationResponse(esfr) => { - net::PeerMessage::EpochSyncFinalizationResponse(esfr) - } } } } diff --git a/chain/network/src/network_protocol/mod.rs b/chain/network/src/network_protocol/mod.rs index 0ef5d3fd122..51b3e31a3e9 100644 --- a/chain/network/src/network_protocol/mod.rs +++ b/chain/network/src/network_protocol/mod.rs @@ -32,7 +32,6 @@ use near_primitives::network::{AnnounceAccount, PeerId}; use near_primitives::sharding::{ ChunkHash, PartialEncodedChunk, PartialEncodedChunkPart, ReceiptProof, ShardChunkHeader, }; -use near_primitives::syncing::{EpochSyncFinalizationResponse, EpochSyncResponse}; use near_primitives::syncing::{ShardStateSyncResponse, ShardStateSyncResponseV1}; use near_primitives::transaction::SignedTransaction; use near_primitives::types::{AccountId, EpochId}; @@ -263,10 +262,6 @@ pub enum PeerMessage { /// Gracefully disconnect from other peer. Disconnect, Challenge(Challenge), - EpochSyncRequest(EpochId), - EpochSyncResponse(Box), - EpochSyncFinalizationRequest(EpochId), - EpochSyncFinalizationResponse(Box), } /* diff --git a/chain/network/src/network_protocol/network.proto b/chain/network/src/network_protocol/network.proto index 571b52f39c2..01a7ec0fa33 100644 --- a/chain/network/src/network_protocol/network.proto +++ b/chain/network/src/network_protocol/network.proto @@ -316,28 +316,6 @@ message Challenge { bytes borsh = 1; } -// TODO: document it -message EpochSyncRequest { - CryptoHash epoch_id = 1; -} - -// Wrapper of borsh-encoded EpochSyncResponse -// https://github.com/near/nearcore/blob/1a4edefd0116f7d1e222bc96569367a02fe64199/core/primitives/src/syncing.rs#L225 -message EpochSyncResponse { - bytes borsh = 1; -} - -// TODO: document it -message EpochSyncFinalizationRequest { - CryptoHash epoch_id = 1; -} - -// Wrapper of borsh-encoded EpochSyncFinalizationResponse -// https://github.com/near/nearcore/blob/1a4edefd0116f7d1e222bc96569367a02fe64199/core/primitives/src/syncing.rs#L202 -message EpochSyncFinalizationResponse { - bytes borsh = 1; -} - // Wrapper of borsh-encoded RoutingSyncV2 // https://github.com/near/nearcore/blob/1a4edefd0116f7d1e222bc96569367a02fe64199/chain/network/src/network_protocol.rs#L225 message RoutingSyncV2 { @@ -354,7 +332,7 @@ message PeerMessage { // https://docs.google.com/document/d/1gCWmt9O-h_-5JDXIqbKxAaSS3Q9pryB1f9DDY1mMav4/edit reserved 1,2,3; // Deprecated fields. - reserved 24; + reserved 20,21,22,23,24; bytes trace_context = 26; @@ -382,10 +360,5 @@ message PeerMessage { RoutedMessage routed = 17; Disconnect disconnect = 18; Challenge challenge = 19; - - EpochSyncRequest epoch_sync_request = 20; - EpochSyncResponse epoch_sync_response = 21; - EpochSyncFinalizationRequest epoch_sync_finalization_request = 22; - EpochSyncFinalizationResponse epoch_sync_finalization_response = 23; } } diff --git a/chain/network/src/network_protocol/proto_conv/peer_message.rs b/chain/network/src/network_protocol/proto_conv/peer_message.rs index cdec63ece78..778e43eb789 100644 --- a/chain/network/src/network_protocol/proto_conv/peer_message.rs +++ b/chain/network/src/network_protocol/proto_conv/peer_message.rs @@ -9,9 +9,7 @@ use crate::time::error::ComponentRange; use borsh::{BorshDeserialize as _, BorshSerialize as _}; use near_primitives::block::{Block, BlockHeader}; use near_primitives::challenge::Challenge; -use near_primitives::syncing::{EpochSyncFinalizationResponse, EpochSyncResponse}; use near_primitives::transaction::SignedTransaction; -use near_primitives::types::EpochId; use protobuf::MessageField as MF; use std::sync::Arc; @@ -155,30 +153,6 @@ impl From<&PeerMessage> for proto::PeerMessage { borsh: r.try_to_vec().unwrap(), ..Default::default() }), - PeerMessage::EpochSyncRequest(epoch_id) => { - ProtoMT::EpochSyncRequest(proto::EpochSyncRequest { - epoch_id: MF::some((&epoch_id.0).into()), - ..Default::default() - }) - } - PeerMessage::EpochSyncResponse(esr) => { - ProtoMT::EpochSyncResponse(proto::EpochSyncResponse { - borsh: esr.try_to_vec().unwrap(), - ..Default::default() - }) - } - PeerMessage::EpochSyncFinalizationRequest(epoch_id) => { - ProtoMT::EpochSyncFinalizationRequest(proto::EpochSyncFinalizationRequest { - epoch_id: MF::some((&epoch_id.0).into()), - ..Default::default() - }) - } - PeerMessage::EpochSyncFinalizationResponse(esfr) => { - ProtoMT::EpochSyncFinalizationResponse(proto::EpochSyncFinalizationResponse { - borsh: esfr.try_to_vec().unwrap(), - ..Default::default() - }) - } }), ..Default::default() } @@ -188,8 +162,6 @@ impl From<&PeerMessage> for proto::PeerMessage { pub type ParseTransactionError = borsh::maybestd::io::Error; pub type ParseRoutedError = borsh::maybestd::io::Error; pub type ParseChallengeError = borsh::maybestd::io::Error; -pub type ParseEpochSyncResponseError = borsh::maybestd::io::Error; -pub type ParseEpochSyncFinalizationResponseError = borsh::maybestd::io::Error; #[derive(thiserror::Error, Debug)] pub enum ParsePeerMessageError { @@ -223,14 +195,6 @@ pub enum ParsePeerMessageError { Routed(ParseRoutedError), #[error("challenge: {0}")] Challenge(ParseChallengeError), - #[error("epoch_sync_request: {0}")] - EpochSyncRequest(ParseRequiredError), - #[error("epoch_sync_response: {0}")] - EpochSyncResponse(ParseEpochSyncResponseError), - #[error("epoch_sync_finalization_request: {0}")] - EpochSyncFinalizationRequest(ParseRequiredError), - #[error("epoch_sync_finalization_response: {0}")] - EpochSyncFinalizationResponse(ParseEpochSyncFinalizationResponseError), #[error("routed_created_at: {0}")] RoutedCreatedAtTimestamp(ComponentRange), #[error("sync_accounts_data: {0}")] @@ -302,25 +266,6 @@ impl TryFrom<&proto::PeerMessage> for PeerMessage { ProtoMT::Challenge(c) => PeerMessage::Challenge( Challenge::try_from_slice(&c.borsh).map_err(Self::Error::Challenge)?, ), - ProtoMT::EpochSyncRequest(esr) => PeerMessage::EpochSyncRequest(EpochId( - try_from_required(&esr.epoch_id).map_err(Self::Error::EpochSyncRequest)?, - )), - ProtoMT::EpochSyncResponse(esr) => PeerMessage::EpochSyncResponse(Box::new( - EpochSyncResponse::try_from_slice(&esr.borsh) - .map_err(Self::Error::EpochSyncResponse)?, - )), - ProtoMT::EpochSyncFinalizationRequest(esr) => { - PeerMessage::EpochSyncFinalizationRequest(EpochId( - try_from_required(&esr.epoch_id) - .map_err(Self::Error::EpochSyncFinalizationRequest)?, - )) - } - ProtoMT::EpochSyncFinalizationResponse(esr) => { - PeerMessage::EpochSyncFinalizationResponse(Box::new( - EpochSyncFinalizationResponse::try_from_slice(&esr.borsh) - .map_err(Self::Error::EpochSyncFinalizationResponse)?, - )) - } }) } } diff --git a/chain/network/src/network_protocol/tests.rs b/chain/network/src/network_protocol/tests.rs index c05d379b4e0..f1bb9ba4ba4 100644 --- a/chain/network/src/network_protocol/tests.rs +++ b/chain/network/src/network_protocol/tests.rs @@ -6,8 +6,6 @@ use crate::time; use crate::types::{HandshakeFailureReason, PeerMessage}; use crate::types::{PartialEncodedChunkRequestMsg, PartialEncodedChunkResponseMsg}; use anyhow::{bail, Context as _}; -use near_primitives::syncing::EpochSyncResponse; -use near_primitives::types::EpochId; #[test] fn bad_account_data_size() { @@ -58,7 +56,6 @@ fn serialize_deserialize() -> anyhow::Result<()> { let a = data::make_signer(&mut rng); let b = data::make_signer(&mut rng); let edge = data::make_edge(&a, &b); - let epoch_id = EpochId(chain.blocks[1].hash().clone()); let chunk_hash = chain.blocks[3].chunks()[0].chunk_hash(); let routed_message1 = Box::new(data::make_routed_message( @@ -98,10 +95,6 @@ fn serialize_deserialize() -> anyhow::Result<()> { PeerMessage::Routed(routed_message2), PeerMessage::Disconnect, PeerMessage::Challenge(data::make_challenge(&mut rng)), - PeerMessage::EpochSyncRequest(epoch_id.clone()), - PeerMessage::EpochSyncResponse(Box::new(EpochSyncResponse::UpToDate)), - PeerMessage::EpochSyncFinalizationRequest(epoch_id), - // TODO: EpochSyncFinalizationResponse ]; // Check that serialize;deserialize = 1 diff --git a/chain/network/src/peer_manager/peer_manager_actor.rs b/chain/network/src/peer_manager/peer_manager_actor.rs index 2cb7c061168..b59e6135905 100644 --- a/chain/network/src/peer_manager/peer_manager_actor.rs +++ b/chain/network/src/peer_manager/peer_manager_actor.rs @@ -1096,29 +1096,6 @@ impl PeerManagerActor { NetworkResponses::RouteNotFound } } - // unused: epoch sync is not implemented - NetworkRequests::EpochSyncRequest { peer_id, epoch_id } => { - if self - .state - .tier2 - .send_message(peer_id, Arc::new(PeerMessage::EpochSyncRequest(epoch_id))) - { - NetworkResponses::NoResponse - } else { - NetworkResponses::RouteNotFound - } - } - // unused: epoch sync is not implemented - NetworkRequests::EpochSyncFinalizationRequest { peer_id, epoch_id } => { - if self.state.tier2.send_message( - peer_id, - Arc::new(PeerMessage::EpochSyncFinalizationRequest(epoch_id)), - ) { - NetworkResponses::NoResponse - } else { - NetworkResponses::RouteNotFound - } - } NetworkRequests::BanPeer { peer_id, ban_reason } => { self.try_ban_peer(&peer_id, ban_reason); NetworkResponses::NoResponse diff --git a/chain/network/src/testonly/fake_client.rs b/chain/network/src/testonly/fake_client.rs index 32ec8c08bc0..7aca62f78a7 100644 --- a/chain/network/src/testonly/fake_client.rs +++ b/chain/network/src/testonly/fake_client.rs @@ -8,7 +8,6 @@ use near_primitives::challenge::Challenge; use near_primitives::hash::CryptoHash; use near_primitives::network::AnnounceAccount; use near_primitives::sharding::{ChunkHash, PartialEncodedChunkPart}; -use near_primitives::syncing::EpochSyncResponse; use near_primitives::transaction::SignedTransaction; use near_primitives::types::EpochId; @@ -22,9 +21,6 @@ pub enum Event { ChunkRequest(ChunkHash), Transaction(SignedTransaction), Challenge(Challenge), - EpochSyncRequest(EpochId), - EpochSyncResponse(EpochSyncResponse), - EpochSyncFinalizationRequest(EpochId), AnnounceAccount(Vec<(AnnounceAccount, Option)>), } @@ -57,14 +53,6 @@ impl actix::Handler> for Actor { self.event_sink.push(Event::BlockHeadersRequest(req)); NetworkViewClientResponses::NoResponse } - NetworkViewClientMessages::EpochSyncRequest { epoch_id } => { - self.event_sink.push(Event::EpochSyncRequest(epoch_id)); - NetworkViewClientResponses::NoResponse - } - NetworkViewClientMessages::EpochSyncFinalizationRequest { epoch_id } => { - self.event_sink.push(Event::EpochSyncFinalizationRequest(epoch_id)); - NetworkViewClientResponses::NoResponse - } NetworkViewClientMessages::AnnounceAccount(aas) => { self.event_sink.push(Event::AnnounceAccount(aas.clone())); NetworkViewClientResponses::AnnounceAccount(aas.into_iter().map(|a| a.0).collect()) @@ -103,9 +91,6 @@ impl actix::Handler> for Actor { resp = NetworkClientResponses::ValidTx; } NetworkClientMessages::Challenge(c) => self.event_sink.push(Event::Challenge(c)), - NetworkClientMessages::EpochSyncResponse(_, resp) => { - self.event_sink.push(Event::EpochSyncResponse(*resp)) - } NetworkClientMessages::NetworkInfo(_) => {} msg => { let msg_type: &'static str = msg.into(); diff --git a/chain/network/src/types.rs b/chain/network/src/types.rs index 836a7712308..2f8e9c8be55 100644 --- a/chain/network/src/types.rs +++ b/chain/network/src/types.rs @@ -15,7 +15,6 @@ use near_primitives::errors::InvalidTxError; use near_primitives::hash::CryptoHash; use near_primitives::network::{AnnounceAccount, PeerId}; use near_primitives::sharding::{PartialEncodedChunk, PartialEncodedChunkWithArcReceipts}; -use near_primitives::syncing::{EpochSyncFinalizationResponse, EpochSyncResponse}; use near_primitives::transaction::SignedTransaction; use near_primitives::types::BlockHeight; use near_primitives::types::{AccountId, EpochId, ShardId}; @@ -70,9 +69,6 @@ pub enum ReasonForBan { InvalidPeerId = 8, InvalidHash = 9, InvalidEdge = 10, - EpochSyncNoResponse = 11, - EpochSyncInvalidResponse = 12, - EpochSyncInvalidFinalizationResponse = 13, Blacklisted = 14, } @@ -236,29 +232,15 @@ impl From for PeerManagerMessageResponse { #[allow(clippy::large_enum_variant)] pub enum NetworkRequests { /// Sends block, either when block was just produced or when requested. - Block { - block: Block, - }, + Block { block: Block }, /// Sends approval. - Approval { - approval_message: ApprovalMessage, - }, + Approval { approval_message: ApprovalMessage }, /// Request block with given hash from given peer. - BlockRequest { - hash: CryptoHash, - peer_id: PeerId, - }, + BlockRequest { hash: CryptoHash, peer_id: PeerId }, /// Request given block headers. - BlockHeadersRequest { - hashes: Vec, - peer_id: PeerId, - }, + BlockHeadersRequest { hashes: Vec, peer_id: PeerId }, /// Request state header for given shard at given state root. - StateRequestHeader { - shard_id: ShardId, - sync_hash: CryptoHash, - target: AccountOrPeerIdOrHash, - }, + StateRequestHeader { shard_id: ShardId, sync_hash: CryptoHash, target: AccountOrPeerIdOrHash }, /// Request state part for given shard at given state root. StateRequestPart { shard_id: ShardId, @@ -267,23 +249,9 @@ pub enum NetworkRequests { target: AccountOrPeerIdOrHash, }, /// Response to state request. - StateResponse { - route_back: CryptoHash, - response: StateResponseInfo, - }, - EpochSyncRequest { - peer_id: PeerId, - epoch_id: EpochId, - }, - EpochSyncFinalizationRequest { - peer_id: PeerId, - epoch_id: EpochId, - }, + StateResponse { route_back: CryptoHash, response: StateResponseInfo }, /// Ban given peer. - BanPeer { - peer_id: PeerId, - ban_reason: ReasonForBan, - }, + BanPeer { peer_id: PeerId, ban_reason: ReasonForBan }, /// Announce account AnnounceAccount(AnnounceAccount), @@ -294,20 +262,14 @@ pub enum NetworkRequests { create_time: time::Instant, }, /// Information about chunk such as its header, some subset of parts and/or incoming receipts - PartialEncodedChunkResponse { - route_back: CryptoHash, - response: PartialEncodedChunkResponseMsg, - }, + PartialEncodedChunkResponse { route_back: CryptoHash, response: PartialEncodedChunkResponseMsg }, /// Information about chunk such as its header, some subset of parts and/or incoming receipts PartialEncodedChunkMessage { account_id: AccountId, partial_encoded_chunk: PartialEncodedChunkWithArcReceipts, }, /// Forwarding a chunk part to a validator tracking the shard - PartialEncodedChunkForward { - account_id: AccountId, - forward: PartialEncodedChunkForwardMsg, - }, + PartialEncodedChunkForward { account_id: AccountId, forward: PartialEncodedChunkForwardMsg }, /// Valid transaction but since we are not validators we send this transaction to current validators. ForwardTx(AccountId, SignedTransaction), @@ -459,10 +421,6 @@ pub enum NetworkClientMessages { BlockApproval(Approval, PeerId), /// State response. StateResponse(StateResponseInfo), - /// Epoch Sync response for light client block request - EpochSyncResponse(PeerId, Box), - /// Epoch Sync response for finalization request - EpochSyncFinalizationResponse(PeerId, Box), /// Request chunk parts and/or receipts. PartialEncodedChunkRequest(PartialEncodedChunkRequestMsg, CryptoHash), @@ -697,10 +655,6 @@ pub enum NetworkViewClientMessages { StateRequestHeader { shard_id: ShardId, sync_hash: CryptoHash }, /// State request part. StateRequestPart { shard_id: ShardId, sync_hash: CryptoHash, part_id: u64 }, - /// A request for a light client info during Epoch Sync - EpochSyncRequest { epoch_id: EpochId }, - /// A request for headers and proofs during Epoch Sync - EpochSyncFinalizationRequest { epoch_id: EpochId }, /// Account announcements that needs to be validated before being processed. /// They are paired with last epoch id known to this announcement, in order to accept only /// newer announcements. @@ -719,10 +673,6 @@ pub enum NetworkViewClientResponses { StateResponse(Box), /// Valid announce accounts. AnnounceAccount(Vec), - /// A response to a request for a light client block during Epoch Sync - EpochSyncResponse(Box), - /// A response to a request for headers and proofs during Epoch Sync - EpochSyncFinalizationResponse(Box), /// Ban peer for malicious behavior. Ban { ban_reason: ReasonForBan }, /// Response not needed diff --git a/tools/chainsync-loadtest/src/network.rs b/tools/chainsync-loadtest/src/network.rs index 6a785786618..e77b19a13fc 100644 --- a/tools/chainsync-loadtest/src/network.rs +++ b/tools/chainsync-loadtest/src/network.rs @@ -310,10 +310,6 @@ impl Handler> for FakeClientActor { NetworkViewClientMessages::BlockHeadersRequest(_) => "BlockHeadersRequest", NetworkViewClientMessages::StateRequestHeader { .. } => "StateRequestHeader", NetworkViewClientMessages::StateRequestPart { .. } => "StateRequestPart", - NetworkViewClientMessages::EpochSyncRequest { .. } => "EpochSyncRequest", - NetworkViewClientMessages::EpochSyncFinalizationRequest { .. } => { - "EpochSyncFinalizationRequest" - } NetworkViewClientMessages::AnnounceAccount(_) => { return NetworkViewClientResponses::NoResponse; } From 2667e4df2440955c09dd0fc66d379b34e9ec586d Mon Sep 17 00:00:00 2001 From: Michal Nazarewicz Date: Mon, 24 Oct 2022 16:02:05 +0100 Subject: [PATCH 013/103] Prefer implementing `Display` to `From` for `String` (#7914) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There’s still `impl From for String`. It’s left intentionally as it avoids string allocation when used compared to using Display. --- chain/client-primitives/src/types.rs | 16 ---------------- core/crypto/src/signature.rs | 27 ++++++++------------------- 2 files changed, 8 insertions(+), 35 deletions(-) diff --git a/chain/client-primitives/src/types.rs b/chain/client-primitives/src/types.rs index 369046ae86a..785262a87bd 100644 --- a/chain/client-primitives/src/types.rs +++ b/chain/client-primitives/src/types.rs @@ -600,22 +600,6 @@ pub enum TxStatusError { TimeoutError, } -impl From for String { - fn from(error: TxStatusError) -> Self { - match error { - TxStatusError::ChainError(err) => format!("Chain error: {}", err), - TxStatusError::MissingTransaction(tx_hash) => { - format!("Transaction {} doesn't exist", tx_hash) - } - TxStatusError::InternalError(debug_message) => { - format!("Internal error: {}", debug_message) - } - TxStatusError::TimeoutError => format!("Timeout error"), - TxStatusError::InvalidTx(e) => format!("Invalid transaction: {}", e), - } - } -} - impl Message for TxStatus { type Result = Result, TxStatusError>; } diff --git a/core/crypto/src/signature.rs b/core/crypto/src/signature.rs index 3a31a6211f1..9b123085330 100644 --- a/core/crypto/src/signature.rs +++ b/core/crypto/src/signature.rs @@ -255,14 +255,18 @@ impl Hash for PublicKey { } impl Display for PublicKey { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> { - write!(f, "{}", String::from(self)) + fn fmt(&self, fmt: &mut Formatter) -> std::fmt::Result { + let (key_type, key_data) = match self { + PublicKey::ED25519(public_key) => (KeyType::ED25519, &public_key.0[..]), + PublicKey::SECP256K1(public_key) => (KeyType::SECP256K1, &public_key.0[..]), + }; + write!(fmt, "{}:{}", key_type, bs58::encode(key_data).into_string()) } } impl Debug for PublicKey { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> { - write!(f, "{}", String::from(self)) + Display::fmt(self, f) } } @@ -305,7 +309,7 @@ impl serde::Serialize for PublicKey { where S: serde::Serializer, { - serializer.serialize_str(&String::from(self)) + serializer.collect_str(self) } } @@ -320,21 +324,6 @@ impl<'de> serde::Deserialize<'de> for PublicKey { } } -impl From<&PublicKey> for String { - fn from(public_key: &PublicKey) -> Self { - match public_key { - PublicKey::ED25519(public_key) => { - format!("{}:{}", KeyType::ED25519, bs58::encode(&public_key.0).into_string()) - } - PublicKey::SECP256K1(public_key) => format!( - "{}:{}", - KeyType::SECP256K1, - bs58::encode(&public_key.0.to_vec()).into_string() - ), - } - } -} - impl FromStr for PublicKey { type Err = crate::errors::ParseKeyError; From 7698da9df7151f0d314451cece64a13ecbfced2b Mon Sep 17 00:00:00 2001 From: pompon0 Date: Mon, 24 Oct 2022 17:30:48 +0200 Subject: [PATCH 014/103] replaced Client struct with async_trait (#7913) The concrete implementation wrapping ClientActor and ViewClientActor has been moved to near_client crate. Network(View)ClientMessage will be moved to near_client crate in a separate PR. --- Cargo.lock | 25 +- Cargo.toml | 1 + chain/client/Cargo.toml | 1 + chain/client/src/adapter.rs | 347 ++++++++++++ chain/client/src/lib.rs | 1 + chain/network/Cargo.toml | 1 + chain/network/src/client.rs | 496 ++++-------------- chain/network/src/peer/peer_actor.rs | 149 ++---- chain/network/src/peer/testonly.rs | 5 +- .../network/src/peer_manager/network_state.rs | 4 +- .../src/peer_manager/peer_manager_actor.rs | 2 +- chain/network/src/peer_manager/testonly.rs | 12 +- chain/network/src/test_utils.rs | 61 --- chain/network/src/testonly/fake_client.rs | 185 ++++--- .../src/tests/network/peer_handshake.rs | 17 +- integration-tests/src/tests/network/runner.rs | 2 +- .../src/tests/network/stress_network.rs | 17 +- nearcore/src/lib.rs | 5 +- tools/chainsync-loadtest/Cargo.toml | 1 + tools/chainsync-loadtest/src/main.rs | 12 +- tools/chainsync-loadtest/src/network.rs | 196 +++---- 21 files changed, 736 insertions(+), 804 deletions(-) create mode 100644 chain/client/src/adapter.rs diff --git a/Cargo.lock b/Cargo.lock index 88db047e438..934aa709e85 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -383,9 +383,9 @@ dependencies = [ [[package]] name = "async-trait" -version = "0.1.53" +version = "0.1.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed6aa3524a2dfcf9fe180c51eae2b58738348d819517ceadf95789c51fff7600" +checksum = "1e805d94e6b5001b651426cf4cd446b1ab5f319d27bab5c644f61de0a804360c" dependencies = [ "proc-macro2", "quote", @@ -878,6 +878,7 @@ version = "0.0.0" dependencies = [ "actix", "anyhow", + "async-trait", "clap 3.1.18", "dirs", "futures", @@ -2928,6 +2929,7 @@ dependencies = [ "actix-rt", "ansi_term", "assert_matches", + "async-trait", "borsh", "chrono", "delay-detector", @@ -3222,6 +3224,7 @@ dependencies = [ "anyhow", "arc-swap", "assert_matches", + "async-trait", "borsh", "bytes", "bytesize", @@ -4288,11 +4291,11 @@ checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5" [[package]] name = "proc-macro2" -version = "1.0.38" +version = "1.0.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9027b48e9d4c9175fa2218adf3557f91c1137021739951d4932f5f8268ac48aa" +checksum = "5ea3d908b0e36316caf9e9e2c4625cdde190a7e6f440d794667ed17a1855e725" dependencies = [ - "unicode-xid", + "unicode-ident", ] [[package]] @@ -5483,13 +5486,13 @@ checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601" [[package]] name = "syn" -version = "1.0.94" +version = "1.0.103" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a07e33e919ebcd69113d5be0e4d70c5707004ff45188910106854f38b960df4a" +checksum = "a864042229133ada95abf3b54fdc62ef5ccabe9515b64717bcb9a1919e59445d" dependencies = [ "proc-macro2", "quote", - "unicode-xid", + "unicode-ident", ] [[package]] @@ -6042,6 +6045,12 @@ version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "099b7128301d285f79ddd55b9a83d5e6b9e97c92e0ea0daebee7263e932de992" +[[package]] +name = "unicode-ident" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ceab39d59e4c9499d4e5a8ee0e2735b891bb7308ac83dfb4e80cad195c9f6f3" + [[package]] name = "unicode-normalization" version = "0.1.19" diff --git a/Cargo.toml b/Cargo.toml index 60a4ac317f8..12983b5c60a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -74,6 +74,7 @@ arc-swap = "1.5" arrayref = "0.3" assert_matches = "1.5.0" async-recursion = "0.3.2" +async-trait = "0.1.58" atty = "0.2" awc = { version = "3", features = ["openssl"] } backtrace = "0.3.64" diff --git a/chain/client/Cargo.toml b/chain/client/Cargo.toml index 82abfb0d52d..2d991d553b2 100644 --- a/chain/client/Cargo.toml +++ b/chain/client/Cargo.toml @@ -11,6 +11,7 @@ edition.workspace = true actix-rt.workspace = true actix.workspace = true ansi_term.workspace = true +async-trait.workspace = true borsh.workspace = true chrono.workspace = true futures.workspace = true diff --git a/chain/client/src/adapter.rs b/chain/client/src/adapter.rs new file mode 100644 index 00000000000..de2c1f818a4 --- /dev/null +++ b/chain/client/src/adapter.rs @@ -0,0 +1,347 @@ +use crate::client_actor::ClientActor; +use crate::view_client::ViewClientActor; +use near_network::time; +use near_network::types::{ + NetworkClientMessages, NetworkClientResponses, NetworkInfo, NetworkViewClientMessages, + NetworkViewClientResponses, PartialEncodedChunkForwardMsg, PartialEncodedChunkRequestMsg, + PartialEncodedChunkResponseMsg, ReasonForBan, StateResponseInfo, +}; +use near_o11y::WithSpanContextExt; +use near_primitives::block::{Approval, Block, BlockHeader}; +use near_primitives::challenge::Challenge; +use near_primitives::hash::CryptoHash; +use near_primitives::network::{AnnounceAccount, PeerId}; +use near_primitives::sharding::PartialEncodedChunk; +use near_primitives::transaction::SignedTransaction; +use near_primitives::types::{AccountId, EpochId, ShardId}; +use near_primitives::views::FinalExecutionOutcomeView; + +pub struct Adapter { + /// Address of the client actor. + client_addr: actix::Addr, + /// Address of the view client actor. + view_client_addr: actix::Addr, +} + +impl Adapter { + pub fn new( + client_addr: actix::Addr, + view_client_addr: actix::Addr, + ) -> Self { + Self { client_addr, view_client_addr } + } +} + +#[async_trait::async_trait] +impl near_network::client::Client for Adapter { + async fn tx_status_request( + &self, + account_id: AccountId, + tx_hash: CryptoHash, + ) -> Option> { + match self + .view_client_addr + .send( + NetworkViewClientMessages::TxStatus { + tx_hash: tx_hash, + signer_account_id: account_id, + } + .with_span_context(), + ) + .await + { + Ok(NetworkViewClientResponses::TxStatus(tx_result)) => Some(tx_result), + Ok(NetworkViewClientResponses::NoResponse) => None, + Ok(resp) => panic!("unexpected ViewClientResponse: {resp:?}"), + Err(err) => { + tracing::error!("mailbox error: {err}"); + None + } + } + } + + async fn tx_status_response(&self, tx_result: FinalExecutionOutcomeView) { + match self + .view_client_addr + .send( + NetworkViewClientMessages::TxStatusResponse(Box::new(tx_result.clone())) + .with_span_context(), + ) + .await + { + Ok(NetworkViewClientResponses::NoResponse) => {} + Ok(resp) => panic!("unexpected ViewClientResponse: {resp:?}"), + Err(err) => tracing::error!("mailbox error: {err}"), + } + } + + async fn state_request_header( + &self, + shard_id: ShardId, + sync_hash: CryptoHash, + ) -> Result, ReasonForBan> { + match self + .view_client_addr + .send( + NetworkViewClientMessages::StateRequestHeader { + shard_id: shard_id, + sync_hash: sync_hash, + } + .with_span_context(), + ) + .await + { + Ok(NetworkViewClientResponses::StateResponse(resp)) => Ok(Some(*resp)), + Ok(NetworkViewClientResponses::NoResponse) => Ok(None), + Ok(NetworkViewClientResponses::Ban { ban_reason }) => Err(ban_reason), + Ok(resp) => panic!("unexpected ViewClientResponse: {resp:?}"), + Err(err) => { + tracing::error!("mailbox error: {err}"); + Ok(None) + } + } + } + + async fn state_request_part( + &self, + shard_id: ShardId, + sync_hash: CryptoHash, + part_id: u64, + ) -> Result, ReasonForBan> { + match self + .view_client_addr + .send( + NetworkViewClientMessages::StateRequestPart { + shard_id: shard_id, + sync_hash: sync_hash, + part_id: part_id, + } + .with_span_context(), + ) + .await + { + Ok(NetworkViewClientResponses::StateResponse(resp)) => Ok(Some(*resp)), + Ok(NetworkViewClientResponses::NoResponse) => Ok(None), + Ok(NetworkViewClientResponses::Ban { ban_reason }) => Err(ban_reason), + Ok(resp) => panic!("unexpected ViewClientResponse: {resp:?}"), + Err(err) => { + tracing::error!("mailbox error: {err}"); + Ok(None) + } + } + } + + async fn state_response(&self, info: StateResponseInfo) { + match self + .client_addr + .send(NetworkClientMessages::StateResponse(info).with_span_context()) + .await + { + Ok(NetworkClientResponses::NoResponse) => {} + Ok(resp) => panic!("unexpected ClientResponse: {resp:?}"), + Err(err) => tracing::error!("mailbox error: {err}"), + } + } + + async fn block_approval(&self, approval: Approval, peer_id: PeerId) { + match self + .client_addr + .send(NetworkClientMessages::BlockApproval(approval, peer_id).with_span_context()) + .await + { + Ok(NetworkClientResponses::NoResponse) => {} + Ok(resp) => panic!("unexpected ClientResponse: {resp:?}"), + Err(err) => tracing::error!("mailbox error: {err}"), + } + } + + async fn transaction(&self, transaction: SignedTransaction, is_forwarded: bool) { + match self + .client_addr + .send( + NetworkClientMessages::Transaction { transaction, is_forwarded, check_only: false } + .with_span_context(), + ) + .await + { + // Almost all variants of NetworkClientResponse are used only in response + // to NetworkClientMessages::Transaction (except for Ban). It will be clearer + // once NetworkClientMessage is split into separate requests. + Ok(resp @ NetworkClientResponses::Ban { .. }) => { + panic!("unexpected ClientResponse: {resp:?}") + } + Ok(_) => {} + Err(err) => tracing::error!("mailbox error: {err}"), + } + } + + async fn partial_encoded_chunk_request( + &self, + req: PartialEncodedChunkRequestMsg, + msg_hash: CryptoHash, + ) { + match self + .client_addr + .send( + NetworkClientMessages::PartialEncodedChunkRequest(req, msg_hash) + .with_span_context(), + ) + .await + { + Ok(NetworkClientResponses::NoResponse) => {} + Ok(resp) => panic!("unexpected ClientResponse: {resp:?}"), + Err(err) => tracing::error!("mailbox error: {err}"), + } + } + + async fn partial_encoded_chunk_response( + &self, + resp: PartialEncodedChunkResponseMsg, + timestamp: time::Instant, + ) { + match self + .client_addr + .send( + NetworkClientMessages::PartialEncodedChunkResponse(resp, timestamp.into()) + .with_span_context(), + ) + .await + { + Ok(NetworkClientResponses::NoResponse) => {} + Ok(resp) => panic!("unexpected ClientResponse: {resp:?}"), + Err(err) => tracing::error!("mailbox error: {err}"), + } + } + + async fn partial_encoded_chunk(&self, chunk: PartialEncodedChunk) { + match self + .client_addr + .send(NetworkClientMessages::PartialEncodedChunk(chunk).with_span_context()) + .await + { + Ok(NetworkClientResponses::NoResponse) => {} + Ok(resp) => panic!("unexpected ClientResponse: {resp:?}"), + Err(err) => tracing::error!("mailbox error: {err}"), + } + } + + async fn partial_encoded_chunk_forward(&self, msg: PartialEncodedChunkForwardMsg) { + match self + .client_addr + .send(NetworkClientMessages::PartialEncodedChunkForward(msg).with_span_context()) + .await + { + Ok(NetworkClientResponses::NoResponse) => {} + Ok(resp) => panic!("unexpected ClientResponse: {resp:?}"), + Err(err) => tracing::error!("mailbox error: {err}"), + } + } + + async fn block_request(&self, hash: CryptoHash) -> Option> { + match self + .view_client_addr + .send(NetworkViewClientMessages::BlockRequest(hash).with_span_context()) + .await + { + Ok(NetworkViewClientResponses::Block(block)) => Some(block), + Ok(NetworkViewClientResponses::NoResponse) => None, + Ok(resp) => panic!("unexpected ViewClientResponse: {resp:?}"), + Err(err) => { + tracing::error!("mailbox error: {err}"); + None + } + } + } + + async fn block_headers_request(&self, hashes: Vec) -> Option> { + match self + .view_client_addr + .send(NetworkViewClientMessages::BlockHeadersRequest(hashes).with_span_context()) + .await + { + Ok(NetworkViewClientResponses::BlockHeaders(block_headers)) => Some(block_headers), + Ok(NetworkViewClientResponses::NoResponse) => None, + Ok(resp) => panic!("unexpected ViewClientResponse: {resp:?}"), + Err(err) => { + tracing::error!("mailbox error: {err}"); + None + } + } + } + + async fn block(&self, block: Block, peer_id: PeerId, was_requested: bool) { + match self + .client_addr + .send(NetworkClientMessages::Block(block, peer_id, was_requested).with_span_context()) + .await + { + Ok(NetworkClientResponses::NoResponse) => {} + Ok(resp) => panic!("unexpected ClientResponse: {resp:?}"), + Err(err) => tracing::error!("mailbox error: {err}"), + } + } + + async fn block_headers( + &self, + headers: Vec, + peer_id: PeerId, + ) -> Result<(), ReasonForBan> { + match self + .client_addr + .send(NetworkClientMessages::BlockHeaders(headers, peer_id).with_span_context()) + .await + { + Ok(NetworkClientResponses::NoResponse) => Ok(()), + Ok(NetworkClientResponses::Ban { ban_reason }) => Err(ban_reason), + Ok(resp) => panic!("unexpected ClientResponse: {resp:?}"), + Err(err) => { + tracing::error!("mailbox error: {err}"); + Ok(()) + } + } + } + + async fn challenge(&self, challenge: Challenge) { + match self + .client_addr + .send(NetworkClientMessages::Challenge(challenge).with_span_context()) + .await + { + Ok(NetworkClientResponses::NoResponse) => {} + Ok(resp) => panic!("unexpected ClientResponse: {resp:?}"), + Err(err) => tracing::error!("mailbox error: {err}"), + } + } + + async fn network_info(&self, info: NetworkInfo) { + match self + .client_addr + .send(NetworkClientMessages::NetworkInfo(info).with_span_context()) + .await + { + Ok(NetworkClientResponses::NoResponse) => {} + Ok(resp) => panic!("unexpected ClientResponse: {resp:?}"), + Err(err) => tracing::error!("mailbox error: {err}"), + } + } + + async fn announce_account( + &self, + accounts: Vec<(AnnounceAccount, Option)>, + ) -> Result, ReasonForBan> { + match self + .view_client_addr + .send(NetworkViewClientMessages::AnnounceAccount(accounts).with_span_context()) + .await + { + Ok(NetworkViewClientResponses::AnnounceAccount(accounts)) => Ok(accounts), + Ok(NetworkViewClientResponses::NoResponse) => Ok(vec![]), + Ok(NetworkViewClientResponses::Ban { ban_reason }) => Err(ban_reason), + Ok(resp) => panic!("unexpected ClientResponse: {resp:?}"), + Err(err) => { + tracing::error!("mailbox error: {err}"); + Ok(vec![]) + } + } + } +} diff --git a/chain/client/src/lib.rs b/chain/client/src/lib.rs index 60b783fe074..02bff5d5fa6 100644 --- a/chain/client/src/lib.rs +++ b/chain/client/src/lib.rs @@ -13,6 +13,7 @@ pub use crate::client::Client; pub use crate::client_actor::{start_client, ClientActor}; pub use crate::view_client::{start_view_client, ViewClientActor}; +pub mod adapter; pub mod adversarial; mod client; mod client_actor; diff --git a/chain/network/Cargo.toml b/chain/network/Cargo.toml index ac55091664b..ff62defa2b5 100644 --- a/chain/network/Cargo.toml +++ b/chain/network/Cargo.toml @@ -12,6 +12,7 @@ anyhow.workspace = true protobuf-codegen.workspace = true [dependencies] +async-trait.workspace = true actix.workspace = true anyhow.workspace = true arc-swap.workspace = true diff --git a/chain/network/src/client.rs b/chain/network/src/client.rs index 4c19f8b711c..78783bf617a 100644 --- a/chain/network/src/client.rs +++ b/chain/network/src/client.rs @@ -2,11 +2,7 @@ use crate::network_protocol::{ PartialEncodedChunkForwardMsg, PartialEncodedChunkRequestMsg, PartialEncodedChunkResponseMsg, StateResponseInfo, }; -use crate::types::{ - NetworkClientMessages, NetworkClientResponses, NetworkInfo, NetworkViewClientMessages, - NetworkViewClientResponses, ReasonForBan, -}; -use near_o11y::{WithSpanContext, WithSpanContextExt}; +use crate::types::{NetworkInfo, ReasonForBan}; use near_primitives::block::{Approval, Block, BlockHeader}; use near_primitives::challenge::Challenge; use near_primitives::hash::CryptoHash; @@ -15,444 +11,158 @@ use near_primitives::sharding::PartialEncodedChunk; use near_primitives::transaction::SignedTransaction; use near_primitives::types::{AccountId, EpochId, ShardId}; use near_primitives::views::FinalExecutionOutcomeView; -use tracing::Instrument; /// A strongly typed asynchronous API for the Client logic. /// It abstracts away the fact that client is implemented using actix /// actors. -/// TODO(gprusak): eventually we might want to replace this concrete -/// implementation with an (async) trait, and move the -/// concrete implementation to the near_client crate. This way we will -/// be able to remove actix from the near_network crate entirely. -pub struct Client { - /// Address of the client actor. - client_addr: actix::Recipient>, - /// Address of the view client actor. - view_client_addr: actix::Recipient>, -} - -impl Client { - pub fn new( - client_addr: actix::Recipient>, - view_client_addr: actix::Recipient>, - ) -> Self { - Self { client_addr, view_client_addr } - } - - pub async fn tx_status_request( +#[async_trait::async_trait] +pub trait Client: Send + Sync + 'static { + async fn tx_status_request( &self, account_id: AccountId, tx_hash: CryptoHash, - ) -> Result, ReasonForBan> { - let _span = tracing::trace_span!(target: "network", "tx_status_request").entered(); - match self - .view_client_addr - .send( - NetworkViewClientMessages::TxStatus { - tx_hash: tx_hash, - signer_account_id: account_id, - } - .with_span_context(), - ) - .in_current_span() - .await - { - Ok(NetworkViewClientResponses::TxStatus(tx_result)) => Ok(Some(*tx_result)), - Ok(NetworkViewClientResponses::Ban { ban_reason }) => Err(ban_reason), - Ok(resp) => panic!("unexpected ViewClientResponse: {resp:?}"), - Err(err) => { - tracing::error!("mailbox error: {err}"); - Ok(None) - } - } - } + ) -> Option>; - pub async fn tx_status_response( - &self, - tx_result: FinalExecutionOutcomeView, - ) -> Result<(), ReasonForBan> { - let _span = tracing::trace_span!(target: "network", "tx_status_response").entered(); - match self - .view_client_addr - .send( - NetworkViewClientMessages::TxStatusResponse(Box::new(tx_result.clone())) - .with_span_context(), - ) - .in_current_span() - .await - { - Ok(NetworkViewClientResponses::NoResponse) => Ok(()), - Ok(NetworkViewClientResponses::Ban { ban_reason }) => Err(ban_reason), - Ok(resp) => panic!("unexpected ViewClientResponse: {resp:?}"), - Err(err) => { - tracing::error!("mailbox error: {err}"); - Ok(()) - } - } - } + async fn tx_status_response(&self, tx_result: FinalExecutionOutcomeView); - pub async fn state_request_header( + async fn state_request_header( &self, shard_id: ShardId, sync_hash: CryptoHash, - ) -> Result, ReasonForBan> { - let _span = tracing::trace_span!(target: "network", "state_request_header").entered(); - match self - .view_client_addr - .send( - NetworkViewClientMessages::StateRequestHeader { - shard_id: shard_id, - sync_hash: sync_hash, - } - .with_span_context(), - ) - .in_current_span() - .await - { - Ok(NetworkViewClientResponses::StateResponse(resp)) => Ok(Some(*resp)), - Ok(NetworkViewClientResponses::NoResponse) => Ok(None), - Ok(NetworkViewClientResponses::Ban { ban_reason }) => Err(ban_reason), - Ok(resp) => panic!("unexpected ViewClientResponse: {resp:?}"), - Err(err) => { - tracing::error!("mailbox error: {err}"); - Ok(None) - } - } - } + ) -> Result, ReasonForBan>; - pub async fn state_request_part( + async fn state_request_part( &self, shard_id: ShardId, sync_hash: CryptoHash, part_id: u64, - ) -> Result, ReasonForBan> { - let _span = tracing::trace_span!(target: "network", "state_request_part").entered(); - match self - .view_client_addr - .send( - NetworkViewClientMessages::StateRequestPart { - shard_id: shard_id, - sync_hash: sync_hash, - part_id: part_id, - } - .with_span_context(), - ) - .in_current_span() - .await - { - Ok(NetworkViewClientResponses::StateResponse(resp)) => Ok(Some(*resp)), - Ok(NetworkViewClientResponses::NoResponse) => Ok(None), - Ok(NetworkViewClientResponses::Ban { ban_reason }) => Err(ban_reason), - Ok(resp) => panic!("unexpected ViewClientResponse: {resp:?}"), - Err(err) => { - tracing::error!("mailbox error: {err}"); - Ok(None) - } - } - } + ) -> Result, ReasonForBan>; - pub async fn state_response(&self, info: StateResponseInfo) -> Result<(), ReasonForBan> { - let _span = tracing::trace_span!(target: "network", "state_response").entered(); - match self - .client_addr - .send(NetworkClientMessages::StateResponse(info).with_span_context()) - .in_current_span() - .await - { - Ok(NetworkClientResponses::NoResponse) => Ok(()), - Ok(NetworkClientResponses::Ban { ban_reason }) => Err(ban_reason), - Ok(resp) => panic!("unexpected ClientResponse: {resp:?}"), - Err(err) => { - tracing::error!("mailbox error: {err}"); - Ok(()) - } - } - } + async fn state_response(&self, info: StateResponseInfo); - pub async fn block_approval( - &self, - approval: Approval, - peer_id: PeerId, - ) -> Result<(), ReasonForBan> { - let _span = tracing::trace_span!(target: "network", "block_approval").entered(); - match self - .client_addr - .send(NetworkClientMessages::BlockApproval(approval, peer_id).with_span_context()) - .in_current_span() - .await - { - Ok(NetworkClientResponses::NoResponse) => Ok(()), - Ok(NetworkClientResponses::Ban { ban_reason }) => Err(ban_reason), - Ok(resp) => panic!("unexpected ClientResponse: {resp:?}"), - Err(err) => { - tracing::error!("mailbox error: {err}"); - Ok(()) - } - } - } + async fn block_approval(&self, approval: Approval, peer_id: PeerId); - pub async fn transaction( - &self, - transaction: SignedTransaction, - is_forwarded: bool, - ) -> Result<(), ReasonForBan> { - let _span = tracing::trace_span!(target: "network", "transaction").entered(); - match self - .client_addr - .send( - NetworkClientMessages::Transaction { transaction, is_forwarded, check_only: false } - .with_span_context(), - ) - .in_current_span() - .await - { - Ok(NetworkClientResponses::ValidTx) => Ok(()), - Ok(NetworkClientResponses::InvalidTx(err)) => { - tracing::warn!(target: "network", ?err, "Received invalid tx"); - // TODO: count as malicious behavior? - Ok(()) - } - Ok(NetworkClientResponses::Ban { ban_reason }) => Err(ban_reason), - Ok(resp) => panic!("unexpected ClientResponse: {resp:?}"), - Err(err) => { - tracing::error!("mailbox error: {err}"); - Ok(()) - } - } - } + async fn transaction(&self, transaction: SignedTransaction, is_forwarded: bool); - pub async fn partial_encoded_chunk_request( + async fn partial_encoded_chunk_request( &self, req: PartialEncodedChunkRequestMsg, msg_hash: CryptoHash, - ) -> Result<(), ReasonForBan> { - let _span = - tracing::trace_span!(target: "network", "partial_encoded_chunk_request").entered(); - match self - .client_addr - .send( - NetworkClientMessages::PartialEncodedChunkRequest(req, msg_hash) - .with_span_context(), - ) - .in_current_span() - .await - { - Ok(NetworkClientResponses::NoResponse) => Ok(()), - Ok(NetworkClientResponses::Ban { ban_reason }) => Err(ban_reason), - Ok(resp) => panic!("unexpected ClientResponse: {resp:?}"), - Err(err) => { - tracing::error!("mailbox error: {err}"); - Ok(()) - } - } - } + ); - pub async fn partial_encoded_chunk_response( + async fn partial_encoded_chunk_response( &self, resp: PartialEncodedChunkResponseMsg, timestamp: time::Instant, - ) -> Result<(), ReasonForBan> { - let _span = - tracing::trace_span!(target: "network", "partial_encoded_chunk_response").entered(); - match self - .client_addr - .send( - NetworkClientMessages::PartialEncodedChunkResponse(resp, timestamp.into()) - .with_span_context(), - ) - .in_current_span() - .await - { - Ok(NetworkClientResponses::NoResponse) => Ok(()), - Ok(NetworkClientResponses::Ban { ban_reason }) => Err(ban_reason), - Ok(resp) => panic!("unexpected ClientResponse: {resp:?}"), - Err(err) => { - tracing::error!("mailbox error: {err}"); - Ok(()) - } - } - } + ); + + async fn partial_encoded_chunk(&self, chunk: PartialEncodedChunk); + + async fn partial_encoded_chunk_forward(&self, msg: PartialEncodedChunkForwardMsg); + + async fn block_request(&self, hash: CryptoHash) -> Option>; - pub async fn partial_encoded_chunk( + async fn block_headers_request(&self, hashes: Vec) -> Option>; + + async fn block(&self, block: Block, peer_id: PeerId, was_requested: bool); + + async fn block_headers( &self, - chunk: PartialEncodedChunk, - ) -> Result<(), ReasonForBan> { - let _span = tracing::trace_span!(target: "network", "partial_encoded_chunk").entered(); - match self - .client_addr - .send(NetworkClientMessages::PartialEncodedChunk(chunk).with_span_context()) - .in_current_span() - .await - { - Ok(NetworkClientResponses::NoResponse) => Ok(()), - Ok(NetworkClientResponses::Ban { ban_reason }) => Err(ban_reason), - Ok(resp) => panic!("unexpected ClientResponse: {resp:?}"), - Err(err) => { - tracing::error!("mailbox error: {err}"); - Ok(()) - } - } - } + headers: Vec, + peer_id: PeerId, + ) -> Result<(), ReasonForBan>; + + async fn challenge(&self, challenge: Challenge); + + async fn network_info(&self, info: NetworkInfo); - pub async fn partial_encoded_chunk_forward( + async fn announce_account( &self, - msg: PartialEncodedChunkForwardMsg, - ) -> Result<(), ReasonForBan> { - let _span = - tracing::trace_span!(target: "network", "partial_encoded_chunk_forward").entered(); - match self - .client_addr - .send(NetworkClientMessages::PartialEncodedChunkForward(msg).with_span_context()) - .in_current_span() - .await - { - Ok(NetworkClientResponses::NoResponse) => Ok(()), - Ok(NetworkClientResponses::Ban { ban_reason }) => Err(ban_reason), - Ok(resp) => panic!("unexpected ClientResponse: {resp:?}"), - Err(err) => { - tracing::error!("mailbox error: {err}"); - Ok(()) - } - } + accounts: Vec<(AnnounceAccount, Option)>, + ) -> Result, ReasonForBan>; +} + +/// Implementation of Client which doesn't do anything and never returns errors. +pub struct Noop; + +#[async_trait::async_trait] +impl Client for Noop { + async fn tx_status_request( + &self, + _account_id: AccountId, + _tx_hash: CryptoHash, + ) -> Option> { + None } - pub async fn block_request(&self, hash: CryptoHash) -> Result, ReasonForBan> { - let _span = tracing::trace_span!(target: "network", "block_request").entered(); - match self - .view_client_addr - .send(NetworkViewClientMessages::BlockRequest(hash).with_span_context()) - .in_current_span() - .await - { - Ok(NetworkViewClientResponses::Block(block)) => Ok(Some(*block)), - Ok(NetworkViewClientResponses::NoResponse) => Ok(None), - Ok(NetworkViewClientResponses::Ban { ban_reason }) => Err(ban_reason), - Ok(resp) => panic!("unexpected ViewClientResponse: {resp:?}"), - Err(err) => { - tracing::error!("mailbox error: {err}"); - Ok(None) - } - } + async fn tx_status_response(&self, _tx_result: FinalExecutionOutcomeView) {} + + async fn state_request_header( + &self, + _shard_id: ShardId, + _sync_hash: CryptoHash, + ) -> Result, ReasonForBan> { + Ok(None) } - pub async fn block_headers_request( + async fn state_request_part( &self, - hashes: Vec, - ) -> Result>, ReasonForBan> { - let _span = tracing::trace_span!(target: "network", "block_headers_request").entered(); - match self - .view_client_addr - .send(NetworkViewClientMessages::BlockHeadersRequest(hashes).with_span_context()) - .in_current_span() - .await - { - Ok(NetworkViewClientResponses::BlockHeaders(block_headers)) => Ok(Some(block_headers)), - Ok(NetworkViewClientResponses::NoResponse) => Ok(None), - Ok(NetworkViewClientResponses::Ban { ban_reason }) => Err(ban_reason), - Ok(resp) => panic!("unexpected ViewClientResponse: {resp:?}"), - Err(err) => { - tracing::error!("mailbox error: {err}"); - Ok(None) - } - } + _shard_id: ShardId, + _sync_hash: CryptoHash, + _part_id: u64, + ) -> Result, ReasonForBan> { + Ok(None) } - pub async fn block( + async fn state_response(&self, _info: StateResponseInfo) {} + async fn block_approval(&self, _approval: Approval, _peer_id: PeerId) {} + + async fn transaction(&self, _transaction: SignedTransaction, _is_forwarded: bool) {} + + async fn partial_encoded_chunk_request( &self, - block: Block, - peer_id: PeerId, - was_requested: bool, - ) -> Result<(), ReasonForBan> { - let _span = tracing::trace_span!(target: "network", "block").entered(); - match self - .client_addr - .send(NetworkClientMessages::Block(block, peer_id, was_requested).with_span_context()) - .in_current_span() - .await - { - Ok(NetworkClientResponses::NoResponse) => Ok(()), - Ok(NetworkClientResponses::Ban { ban_reason }) => Err(ban_reason), - Ok(resp) => panic!("unexpected ClientResponse: {resp:?}"), - Err(err) => { - tracing::error!("mailbox error: {err}"); - Ok(()) - } - } + _req: PartialEncodedChunkRequestMsg, + _msg_hash: CryptoHash, + ) { } - pub async fn block_headers( + async fn partial_encoded_chunk_response( &self, - headers: Vec, - peer_id: PeerId, - ) -> Result<(), ReasonForBan> { - let _span = tracing::trace_span!(target: "network", "block_headers").entered(); - match self - .client_addr - .send(NetworkClientMessages::BlockHeaders(headers, peer_id).with_span_context()) - .in_current_span() - .await - { - Ok(NetworkClientResponses::NoResponse) => Ok(()), - Ok(NetworkClientResponses::Ban { ban_reason }) => Err(ban_reason), - Ok(resp) => panic!("unexpected ClientResponse: {resp:?}"), - Err(err) => { - tracing::error!("mailbox error: {err}"); - Ok(()) - } - } + _resp: PartialEncodedChunkResponseMsg, + _timestamp: time::Instant, + ) { } - pub async fn challenge(&self, challenge: Challenge) -> Result<(), ReasonForBan> { - let _span = tracing::trace_span!(target: "network", "challenge").entered(); - match self - .client_addr - .send(NetworkClientMessages::Challenge(challenge).with_span_context()) - .in_current_span() - .await - { - Ok(NetworkClientResponses::NoResponse) => Ok(()), - Ok(NetworkClientResponses::Ban { ban_reason }) => Err(ban_reason), - Ok(resp) => panic!("unexpected ClientResponse: {resp:?}"), - Err(err) => { - tracing::error!("mailbox error: {err}"); - Ok(()) - } - } + async fn partial_encoded_chunk(&self, _chunk: PartialEncodedChunk) {} + + async fn partial_encoded_chunk_forward(&self, _msg: PartialEncodedChunkForwardMsg) {} + + async fn block_request(&self, _hash: CryptoHash) -> Option> { + None } - pub async fn network_info(&self, info: NetworkInfo) { - let _span = tracing::trace_span!(target: "network", "network_info").entered(); - match self - .client_addr - .send(NetworkClientMessages::NetworkInfo(info).with_span_context()) - .in_current_span() - .await - { - Ok(NetworkClientResponses::NoResponse) => {} - Ok(resp) => panic!("unexpected ClientResponse: {resp:?}"), - Err(err) => tracing::error!("mailbox error: {err}"), - } + async fn block_headers_request(&self, _hashes: Vec) -> Option> { + None } - pub async fn announce_account( + async fn block(&self, _block: Block, _peer_id: PeerId, _was_requested: bool) {} + + async fn block_headers( &self, - accounts: Vec<(AnnounceAccount, Option)>, + _headers: Vec, + _peer_id: PeerId, + ) -> Result<(), ReasonForBan> { + Ok(()) + } + + async fn challenge(&self, _challenge: Challenge) {} + + async fn network_info(&self, _info: NetworkInfo) {} + + async fn announce_account( + &self, + _accounts: Vec<(AnnounceAccount, Option)>, ) -> Result, ReasonForBan> { - let _span = tracing::trace_span!(target: "network", "announce_account").entered(); - match self - .view_client_addr - .send(NetworkViewClientMessages::AnnounceAccount(accounts).with_span_context()) - .in_current_span() - .await - { - Ok(NetworkViewClientResponses::AnnounceAccount(accounts)) => Ok(accounts), - Ok(NetworkViewClientResponses::NoResponse) => Ok(vec![]), - Ok(NetworkViewClientResponses::Ban { ban_reason }) => Err(ban_reason), - Ok(resp) => panic!("unexpected ClientResponse: {resp:?}"), - Err(err) => { - tracing::error!("mailbox error: {err}"); - Ok(vec![]) - } - } + Ok(vec![]) } } diff --git a/chain/network/src/peer/peer_actor.rs b/chain/network/src/peer/peer_actor.rs index f5b87a2f5ac..bfd066d6c2e 100644 --- a/chain/network/src/peer/peer_actor.rs +++ b/chain/network/src/peer/peer_actor.rs @@ -37,14 +37,13 @@ use near_primitives::utils::DisplayOption; use near_primitives::version::{ ProtocolVersion, PEER_MIN_ALLOWED_PROTOCOL_VERSION, PROTOCOL_VERSION, }; -use opentelemetry::ContextGuard; use parking_lot::Mutex; use std::fmt::Debug; use std::io; use std::net::SocketAddr; use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::Arc; -use tracing::{debug, error, info, warn, Instrument}; +use tracing::{debug, error, info, warn}; /// Maximum number of messages per minute from single peer. // TODO(#5453): current limit is way to high due to us sending lots of messages during sync. @@ -274,24 +273,6 @@ impl PeerActor { return PeerMessage::deserialize(Encoding::Borsh, msg); } - fn parse_message_with_remote_context( - &mut self, - msg: &[u8], - ) -> Result<(PeerMessage, Option), ParsePeerMessageError> { - let _span = - tracing::trace_span!(target: "network", "parse_message_with_remote_context").entered(); - if let Some(e) = self.encoding() { - return PeerMessage::deserialize_with_remote_context(e, msg); - } - if let Ok((msg, guard)) = PeerMessage::deserialize_with_remote_context(Encoding::Proto, msg) - { - tracing::warn!("deserialized, has_guard: {}", guard.is_some()); - self.protocol_buffers_supported = true; - return Ok((msg, guard)); - } - return PeerMessage::deserialize_with_remote_context(Encoding::Borsh, msg); - } - fn send_message_or_log(&self, msg: &PeerMessage) { self.send_message(msg); } @@ -547,7 +528,7 @@ impl PeerActor { let conn = conn.clone(); wrap_future(async move { loop { - interval.tick().in_current_span().await; + interval.tick().await; let sent = tracker.lock().sent_bytes.minute_stats(&clock); let received = tracker.lock().received_bytes.minute_stats(&clock); conn.stats @@ -719,68 +700,52 @@ impl PeerActor { msg_hash: CryptoHash, body: RoutedMessageBody, ) -> Result, ReasonForBan> { - let _span = tracing::warn_span!(target: "network", "receive_routed_message").entered(); Ok(match body { RoutedMessageBody::TxStatusRequest(account_id, tx_hash) => network_state .client .tx_status_request(account_id, tx_hash) - .in_current_span() - .await? - .map(RoutedMessageBody::TxStatusResponse), + .await + .map(|v| RoutedMessageBody::TxStatusResponse(*v)), RoutedMessageBody::TxStatusResponse(tx_result) => { - network_state.client.tx_status_response(tx_result).in_current_span().await?; + network_state.client.tx_status_response(tx_result).await; None } RoutedMessageBody::StateRequestHeader(shard_id, sync_hash) => network_state .client .state_request_header(shard_id, sync_hash) - .in_current_span() .await? .map(RoutedMessageBody::VersionedStateResponse), RoutedMessageBody::StateRequestPart(shard_id, sync_hash, part_id) => network_state .client .state_request_part(shard_id, sync_hash, part_id) - .in_current_span() .await? .map(RoutedMessageBody::VersionedStateResponse), RoutedMessageBody::VersionedStateResponse(info) => { - network_state.client.state_response(info).in_current_span().await?; + network_state.client.state_response(info).await; None } RoutedMessageBody::BlockApproval(approval) => { - network_state.client.block_approval(approval, peer_id).in_current_span().await?; + network_state.client.block_approval(approval, peer_id).await; None } RoutedMessageBody::ForwardTx(transaction) => { - network_state - .client - .transaction(transaction, /*is_forwarded=*/ true) - .in_current_span() - .await?; + network_state.client.transaction(transaction, /*is_forwarded=*/ true).await; None } RoutedMessageBody::PartialEncodedChunkRequest(request) => { - network_state - .client - .partial_encoded_chunk_request(request, msg_hash) - .in_current_span() - .await?; + network_state.client.partial_encoded_chunk_request(request, msg_hash).await; None } RoutedMessageBody::PartialEncodedChunkResponse(response) => { - network_state - .client - .partial_encoded_chunk_response(response, clock.now()) - .in_current_span() - .await?; + network_state.client.partial_encoded_chunk_response(response, clock.now()).await; None } RoutedMessageBody::VersionedPartialEncodedChunk(chunk) => { - network_state.client.partial_encoded_chunk(chunk).in_current_span().await?; + network_state.client.partial_encoded_chunk(chunk).await; None } RoutedMessageBody::PartialEncodedChunkForward(msg) => { - network_state.client.partial_encoded_chunk_forward(msg).in_current_span().await?; + network_state.client.partial_encoded_chunk_forward(msg).await; None } RoutedMessageBody::ReceiptOutcomeRequest(_) => { @@ -803,8 +768,6 @@ impl PeerActor { conn: &connection::Connection, msg: PeerMessage, ) { - let span = tracing::trace_span!( target: "network", "receive_message"); - let span_guard = span.enter(); // This is a fancy way to clone the message iff event_sink is non-null. // If you have a better idea on how to achieve that, feel free to improve this. let message_processed_event = self @@ -825,16 +788,11 @@ impl PeerActor { let clock = self.clock.clone(); let network_state = self.network_state.clone(); let peer_id = conn.peer_info.id.clone(); - drop(span_guard); ctx.spawn(wrap_future(async move { - let _span = tracing::warn_span!(target: "network", "receive_routed_message_span").entered(); - tracing::warn!("receive_routed_message !1"); Ok(match msg { PeerMessage::Routed(msg) => { let msg_hash = msg.hash(); - Self::receive_routed_message(&clock, &network_state, peer_id, msg_hash, msg.msg.body) - .in_current_span() - .await?.map( + Self::receive_routed_message(&clock, &network_state, peer_id, msg_hash, msg.msg.body).await?.map( |body| { PeerMessage::Routed(network_state.sign_message( &clock, @@ -844,46 +802,33 @@ impl PeerActor { ) } PeerMessage::BlockRequest(hash) => { - network_state.client.block_request(hash) - .in_current_span() - .await?.map(PeerMessage::Block) + network_state.client.block_request(hash).await.map(|b|PeerMessage::Block(*b)) } PeerMessage::BlockHeadersRequest(hashes) => { - network_state.client.block_headers_request(hashes) - .in_current_span() - .await?.map(PeerMessage::BlockHeaders) + network_state.client.block_headers_request(hashes).await.map(PeerMessage::BlockHeaders) } PeerMessage::Block(block) => { - network_state.client.block(block, peer_id, was_requested) - .in_current_span() - .await?; + network_state.client.block(block, peer_id, was_requested).await; None } PeerMessage::Transaction(transaction) => { - network_state.client.transaction(transaction, /*is_forwarded=*/ false) - .in_current_span() - .await?; + network_state.client.transaction(transaction, /*is_forwarded=*/ false).await; None } PeerMessage::BlockHeaders(headers) => { - network_state.client.block_headers(headers, peer_id) - .in_current_span() - .await?; + network_state.client.block_headers(headers, peer_id).await?; None } PeerMessage::Challenge(challenge) => { - network_state.client.challenge(challenge) - .in_current_span() - .await?; + network_state.client.challenge(challenge).await; None } msg => { tracing::error!(target: "network", "Peer received unexpected type: {:?}", msg); None } - })}.in_current_span()) + })}) .map(|res, act: &mut PeerActor, ctx| { - tracing::warn!("receive_routed_message !2"); match res { // TODO(gprusak): make sure that for routed messages we drop routeback info correctly. Ok(Some(resp)) => act.send_message_or_log(&resp), @@ -901,11 +846,6 @@ impl PeerActor { conn: &connection::Connection, peer_msg: PeerMessage, ) { - let _span = tracing::trace_span!( - target: "network", - "handle_msg_ready") - .entered(); - match peer_msg.clone() { PeerMessage::Disconnect => { debug!(target: "network", "Disconnect signal. Me: {:?} Peer: {:?}", self.my_node_info.id, self.other_peer_id()); @@ -1004,12 +944,8 @@ impl PeerActor { return None; } // Verify and add the new data to the internal state. - let (new_data, err) = pms - .accounts_data - .clone() - .insert(msg.accounts_data) - .in_current_span() - .await; + let (new_data, err) = + pms.accounts_data.clone().insert(msg.accounts_data).await; // Broadcast any new data we have found, even in presence of an error. // This will prevent a malicious peer from forcing us to re-verify valid // datasets. See accounts_data::Cache documentation for details. @@ -1023,7 +959,7 @@ impl PeerActor { .filter(|p| peer_id != p.peer_info.id) .map(|p| p.send_accounts_data(new_data.clone())) .collect(); - futures_util::future::join_all(handles).in_current_span().await; + futures_util::future::join_all(handles).await; } err.map(|err| match err { accounts_data::Error::InvalidSignature => { @@ -1133,16 +1069,15 @@ impl PeerActor { self.network_state .validate_edges_and_add_to_routing_table(conn.peer_info.id.clone(), edges); ctx.spawn( - wrap_future(async move { - network_state.client.announce_account(accounts).in_current_span().await - }) - .then(move |res, act: &mut PeerActor, ctx| { - match res { - Err(ban_reason) => act.stop(ctx, ClosingReason::Ban(ban_reason)), - Ok(accounts) => act.network_state.broadcast_accounts(accounts), - } - wrap_future(async {}) - }), + wrap_future(async move { network_state.client.announce_account(accounts).await }).then( + move |res, act: &mut PeerActor, ctx| { + match res { + Err(ban_reason) => act.stop(ctx, ClosingReason::Ban(ban_reason)), + Ok(accounts) => act.network_state.broadcast_accounts(accounts), + } + wrap_future(async {}) + }, + ), ); } } @@ -1261,12 +1196,7 @@ impl actix::Handler for PeerActor { type Result = (); #[perf] fn handle(&mut self, stream::Frame(msg): stream::Frame, ctx: &mut Self::Context) { - let _span = tracing::trace_span!( - target: "network", - "handle", - handler = "bytes", - actor = "PeerActor") - .entered(); + let _span = tracing::trace_span!(target: "network", "handle", handler = "bytes").entered(); // TODO(#5155) We should change our code to track size of messages received from Peer // as long as it travels to PeerManager, etc. @@ -1276,22 +1206,13 @@ impl actix::Handler for PeerActor { } self.update_stats_on_receiving_message(msg.len()); - let (mut peer_msg, guard) = match self.parse_message_with_remote_context(&msg) { - Ok((msg, guard)) => (msg, guard), + let mut peer_msg = match self.parse_message(&msg) { + Ok(msg) => msg, Err(err) => { debug!(target: "network", "Received invalid data {} from {}: {}", pretty::AbbrBytes(&msg), self.peer_info, err); return; } }; - tracing::warn!("parsed, has_guard: {}", guard.is_some()); - - let _span2 = tracing::trace_span!( - target: "network", - "handle-with-guard", - handler = "bytes", - has_guard = (guard.is_some()), - actor = "PeerActor") - .entered(); match &peer_msg { PeerMessage::Routed(msg) => { diff --git a/chain/network/src/peer/testonly.rs b/chain/network/src/peer/testonly.rs index 17e76409d41..9d0cd8a0dfc 100644 --- a/chain/network/src/peer/testonly.rs +++ b/chain/network/src/peer/testonly.rs @@ -1,5 +1,4 @@ use crate::broadcast; -use crate::client; use crate::config::NetworkConfig; use crate::network_protocol::testonly as data; use crate::network_protocol::{ @@ -161,7 +160,7 @@ impl PeerHandle { let (send, recv) = broadcast::unbounded_channel(); let actix = ActixSystem::spawn(move || { let fpm = FakePeerManagerActor { cfg: cfg.clone() }.start(); - let fc = fake_client::start(send.sink().compose(Event::Client)); + let fc = Arc::new(fake_client::Fake { event_sink: send.sink().compose(Event::Client) }); let store = store::Store::from(near_store::db::TestDB::new()); let routing_table_view = RoutingTableView::new(store.clone(), cfg.id()); // WARNING: this is a hack to make PeerActor use a specific nonce @@ -185,7 +184,7 @@ impl PeerHandle { let network_state = Arc::new(NetworkState::new( Arc::new(network_cfg.verify().unwrap()), cfg.chain.genesis_id.clone(), - client::Client::new(fc.clone().recipient(), fc.clone().recipient()), + fc, fpm.recipient(), routing_table_addr, routing_table_view, diff --git a/chain/network/src/peer_manager/network_state.rs b/chain/network/src/peer_manager/network_state.rs index 37c9c463895..430bcdf4ebe 100644 --- a/chain/network/src/peer_manager/network_state.rs +++ b/chain/network/src/peer_manager/network_state.rs @@ -36,7 +36,7 @@ pub(crate) struct NetworkState { pub config: Arc, /// GenesisId of the chain. pub genesis_id: GenesisId, - pub client: client::Client, + pub client: Arc, /// Address of the peer manager actor. pub peer_manager_addr: Recipient>, /// RoutingTableActor, responsible for computing routing table, routing table exchange, etc. @@ -69,7 +69,7 @@ impl NetworkState { pub fn new( config: Arc, genesis_id: GenesisId, - client: client::Client, + client: Arc, peer_manager_addr: Recipient>, routing_table_addr: actix::Addr, routing_table_view: RoutingTableView, diff --git a/chain/network/src/peer_manager/peer_manager_actor.rs b/chain/network/src/peer_manager/peer_manager_actor.rs index b59e6135905..1d6e4e43c6c 100644 --- a/chain/network/src/peer_manager/peer_manager_actor.rs +++ b/chain/network/src/peer_manager/peer_manager_actor.rs @@ -263,7 +263,7 @@ impl PeerManagerActor { clock: time::Clock, store: Arc, config: config::NetworkConfig, - client: client::Client, + client: Arc, genesis_id: GenesisId, ) -> anyhow::Result> { let config = config.verify().context("config")?; diff --git a/chain/network/src/peer_manager/testonly.rs b/chain/network/src/peer_manager/testonly.rs index fa955b4cb66..4652a48a26a 100644 --- a/chain/network/src/peer_manager/testonly.rs +++ b/chain/network/src/peer_manager/testonly.rs @@ -1,5 +1,4 @@ use crate::broadcast; -use crate::client; use crate::config; use crate::network_protocol::testonly as data; use crate::network_protocol::{ @@ -287,16 +286,9 @@ pub(crate) async fn start( let chain = chain.clone(); move || { let genesis_id = chain.genesis_id.clone(); - let fc = fake_client::start(send.sink().compose(Event::Client)); + let fc = Arc::new(fake_client::Fake { event_sink: send.sink().compose(Event::Client) }); cfg.event_sink = send.sink().compose(Event::PeerManager); - PeerManagerActor::spawn( - clock, - store, - cfg, - client::Client::new(fc.clone().recipient(), fc.clone().recipient()), - genesis_id, - ) - .unwrap() + PeerManagerActor::spawn(clock, store, cfg, fc, genesis_id).unwrap() } }) .await; diff --git a/chain/network/src/test_utils.rs b/chain/network/src/test_utils.rs index 22b5089d60c..c713c6809a2 100644 --- a/chain/network/src/test_utils.rs +++ b/chain/network/src/test_utils.rs @@ -322,67 +322,6 @@ impl MockPeerManagerAdapter { } } -pub mod test_features { - use crate::client; - use crate::config; - use crate::test_utils::convert_boot_nodes; - use crate::time; - use crate::types::{NetworkClientMessages, NetworkClientResponses}; - use crate::types::{NetworkViewClientMessages, NetworkViewClientResponses}; - use crate::PeerManagerActor; - use actix::actors::mocker::Mocker; - use actix::Actor; - use near_primitives::block::GenesisId; - use std::sync::atomic::{AtomicUsize, Ordering}; - use std::sync::Arc; - - /// Mock for `ClientActor` - type ClientMock = Mocker; - /// Mock for `ViewClientActor` - type ViewClientMock = Mocker; - - // Make peer manager for unit tests - pub fn spawn_peer_manager( - store: Arc, - mut config: config::NetworkConfig, - boot_nodes: Vec<(&str, u16)>, - peer_max_count: u32, - ) -> actix::Addr { - config.boot_nodes = convert_boot_nodes(boot_nodes); - config.max_num_peers = peer_max_count; - let counter = Arc::new(AtomicUsize::new(0)); - let counter1 = counter.clone(); - let client_addr = ClientMock::mock(Box::new(move |_msg, _ctx| { - Box::new(Some(NetworkClientResponses::NoResponse)) - })) - .start(); - - let view_client_addr = ViewClientMock::mock(Box::new(move |msg, _ctx| { - let msg = msg.downcast_ref::().unwrap(); - match msg { - NetworkViewClientMessages::AnnounceAccount(accounts) => { - if !accounts.is_empty() { - counter1.fetch_add(1, Ordering::SeqCst); - } - Box::new(Some(NetworkViewClientResponses::AnnounceAccount( - accounts.clone().into_iter().map(|obj| obj.0).collect(), - ))) - } - _ => Box::new(Some(NetworkViewClientResponses::NoResponse)), - } - })) - .start(); - PeerManagerActor::spawn( - time::Clock::real(), - store, - config, - client::Client::new(client_addr.recipient(), view_client_addr.recipient()), - GenesisId::default(), - ) - .unwrap() - } -} - #[derive(Message, Clone, Debug)] #[rtype(result = "()")] pub struct SetAdvOptions { diff --git a/chain/network/src/testonly/fake_client.rs b/chain/network/src/testonly/fake_client.rs index 7aca62f78a7..e515dfa7937 100644 --- a/chain/network/src/testonly/fake_client.rs +++ b/chain/network/src/testonly/fake_client.rs @@ -1,15 +1,18 @@ +use crate::client; +use crate::network_protocol::{ + PartialEncodedChunkForwardMsg, PartialEncodedChunkRequestMsg, PartialEncodedChunkResponseMsg, + StateResponseInfo, +}; use crate::sink::Sink; -use crate::types::{NetworkClientMessages, NetworkClientResponses}; -use crate::types::{NetworkViewClientMessages, NetworkViewClientResponses}; -use actix::Actor as _; -use near_o11y::WithSpanContext; -use near_primitives::block::{Block, BlockHeader}; +use crate::types::{NetworkInfo, ReasonForBan}; +use near_primitives::block::{Approval, Block, BlockHeader}; use near_primitives::challenge::Challenge; use near_primitives::hash::CryptoHash; -use near_primitives::network::AnnounceAccount; -use near_primitives::sharding::{ChunkHash, PartialEncodedChunkPart}; +use near_primitives::network::{AnnounceAccount, PeerId}; +use near_primitives::sharding::{ChunkHash, PartialEncodedChunk, PartialEncodedChunkPart}; use near_primitives::transaction::SignedTransaction; -use near_primitives::types::EpochId; +use near_primitives::types::{AccountId, EpochId, ShardId}; +use near_primitives::views::FinalExecutionOutcomeView; #[derive(Debug, PartialEq, Eq, Clone)] pub enum Event { @@ -24,79 +27,109 @@ pub enum Event { AnnounceAccount(Vec<(AnnounceAccount, Option)>), } -pub struct Actor { - event_sink: Sink, +pub(crate) struct Fake { + pub event_sink: Sink, } -impl actix::Actor for Actor { - type Context = actix::Context; -} +#[async_trait::async_trait] +impl client::Client for Fake { + async fn tx_status_request( + &self, + _account_id: AccountId, + _tx_hash: CryptoHash, + ) -> Option> { + unimplemented!(); + } -pub fn start(event_sink: Sink) -> actix::Addr { - Actor { event_sink }.start() -} + async fn tx_status_response(&self, _tx_result: FinalExecutionOutcomeView) {} -impl actix::Handler> for Actor { - type Result = NetworkViewClientResponses; - fn handle( - &mut self, - msg: WithSpanContext, - _ctx: &mut Self::Context, - ) -> Self::Result { - let msg = msg.msg; - match msg { - NetworkViewClientMessages::BlockRequest(block_hash) => { - self.event_sink.push(Event::BlockRequest(block_hash)); - NetworkViewClientResponses::NoResponse - } - NetworkViewClientMessages::BlockHeadersRequest(req) => { - self.event_sink.push(Event::BlockHeadersRequest(req)); - NetworkViewClientResponses::NoResponse - } - NetworkViewClientMessages::AnnounceAccount(aas) => { - self.event_sink.push(Event::AnnounceAccount(aas.clone())); - NetworkViewClientResponses::AnnounceAccount(aas.into_iter().map(|a| a.0).collect()) - } - msg => { - let msg_type: &'static str = msg.into(); - panic!("unsupported message {msg_type}") - } - } + async fn state_request_header( + &self, + _shard_id: ShardId, + _sync_hash: CryptoHash, + ) -> Result, ReasonForBan> { + unimplemented!(); } -} -impl actix::Handler> for Actor { - type Result = NetworkClientResponses; - fn handle( - &mut self, - msg: WithSpanContext, - _ctx: &mut Self::Context, - ) -> Self::Result { - let msg = msg.msg; - - let mut resp = NetworkClientResponses::NoResponse; - match msg { - NetworkClientMessages::Block(b, _, _) => self.event_sink.push(Event::Block(b)), - NetworkClientMessages::BlockHeaders(bhs, _) => { - self.event_sink.push(Event::BlockHeaders(bhs)) - } - NetworkClientMessages::PartialEncodedChunkResponse(resp, _) => { - self.event_sink.push(Event::Chunk(resp.parts)) - } - NetworkClientMessages::PartialEncodedChunkRequest(req, _) => { - self.event_sink.push(Event::ChunkRequest(req.chunk_hash)) - } - NetworkClientMessages::Transaction { transaction, .. } => { - self.event_sink.push(Event::Transaction(transaction)); - resp = NetworkClientResponses::ValidTx; - } - NetworkClientMessages::Challenge(c) => self.event_sink.push(Event::Challenge(c)), - NetworkClientMessages::NetworkInfo(_) => {} - msg => { - let msg_type: &'static str = msg.into(); - panic!("unsupported message {msg_type}") - } - }; - resp + async fn state_request_part( + &self, + _shard_id: ShardId, + _sync_hash: CryptoHash, + _part_id: u64, + ) -> Result, ReasonForBan> { + unimplemented!(); + } + + async fn state_response(&self, _info: StateResponseInfo) { + unimplemented!(); + } + + async fn block_approval(&self, _approval: Approval, _peer_id: PeerId) { + unimplemented!(); + } + + async fn transaction(&self, transaction: SignedTransaction, _is_forwarded: bool) { + self.event_sink.push(Event::Transaction(transaction)); + } + + async fn partial_encoded_chunk_request( + &self, + req: PartialEncodedChunkRequestMsg, + _msg_hash: CryptoHash, + ) { + self.event_sink.push(Event::ChunkRequest(req.chunk_hash)); + } + + async fn partial_encoded_chunk_response( + &self, + resp: PartialEncodedChunkResponseMsg, + _timestamp: time::Instant, + ) { + self.event_sink.push(Event::Chunk(resp.parts)); + } + + async fn partial_encoded_chunk(&self, _chunk: PartialEncodedChunk) { + unimplemented!(); + } + + async fn partial_encoded_chunk_forward(&self, _msg: PartialEncodedChunkForwardMsg) { + unimplemented!(); + } + + async fn block_request(&self, hash: CryptoHash) -> Option> { + self.event_sink.push(Event::BlockRequest(hash)); + None + } + + async fn block_headers_request(&self, hashes: Vec) -> Option> { + self.event_sink.push(Event::BlockHeadersRequest(hashes)); + None + } + + async fn block(&self, block: Block, _peer_id: PeerId, _was_requested: bool) { + self.event_sink.push(Event::Block(block)); + } + + async fn block_headers( + &self, + headers: Vec, + _peer_id: PeerId, + ) -> Result<(), ReasonForBan> { + self.event_sink.push(Event::BlockHeaders(headers)); + Ok(()) + } + + async fn challenge(&self, challenge: Challenge) { + self.event_sink.push(Event::Challenge(challenge)); + } + + async fn network_info(&self, _info: NetworkInfo) {} + + async fn announce_account( + &self, + accounts: Vec<(AnnounceAccount, Option)>, + ) -> Result, ReasonForBan> { + self.event_sink.push(Event::AnnounceAccount(accounts.clone())); + Ok(accounts.into_iter().map(|a| a.0).collect()) } } diff --git a/integration-tests/src/tests/network/peer_handshake.rs b/integration-tests/src/tests/network/peer_handshake.rs index bb9b7dddaa7..22ef6c0fbdb 100644 --- a/integration-tests/src/tests/network/peer_handshake.rs +++ b/integration-tests/src/tests/network/peer_handshake.rs @@ -3,28 +3,21 @@ use near_network::time; use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; use std::sync::Arc; -use actix::actors::mocker::Mocker; use actix::Actor; use actix::System; use futures::{future, FutureExt}; use near_primitives::block::GenesisId; use near_actix_test_utils::run_actix; -use near_client::{ClientActor, ViewClientActor}; use near_o11y::testonly::init_test_logger; use near_network::config; use near_network::test_utils::{ convert_boot_nodes, open_port, wait_or_timeout, GetInfo, StopSignal, WaitOrTimeoutActor, }; -use near_network::types::NetworkClientResponses; -use near_network::types::NetworkViewClientResponses; use near_network::PeerManagerActor; use near_o11y::WithSpanContextExt; -type ClientMock = Mocker; -type ViewClientMock = Mocker; - #[cfg(test)] fn make_peer_manager( seed: &str, @@ -37,20 +30,12 @@ fn make_peer_manager( config.max_num_peers = peer_max_count; config.ideal_connections_hi = peer_max_count; config.ideal_connections_lo = peer_max_count; - let client_addr = ClientMock::mock(Box::new(move |_msg, _ctx| { - Box::new(Some(NetworkClientResponses::NoResponse)) - })) - .start(); - let view_client_addr = ViewClientMock::mock(Box::new(|_msg, _ctx| { - Box::new(Some(NetworkViewClientResponses::NoResponse)) - })) - .start(); PeerManagerActor::spawn( time::Clock::real(), near_store::db::TestDB::new(), config, - near_network::client::Client::new(client_addr.recipient(), view_client_addr.recipient()), + Arc::new(near_network::client::Noop), GenesisId::default(), ) .unwrap() diff --git a/integration-tests/src/tests/network/runner.rs b/integration-tests/src/tests/network/runner.rs index 03765ac3149..2e1edaf6ea5 100644 --- a/integration-tests/src/tests/network/runner.rs +++ b/integration-tests/src/tests/network/runner.rs @@ -100,7 +100,7 @@ fn setup_network_node( time::Clock::real(), db.clone(), config, - near_network::client::Client::new(client_actor.recipient(), view_client_actor.recipient()), + Arc::new(near_client::adapter::Adapter::new(client_actor, view_client_actor)), genesis_id, ) .unwrap(); diff --git a/integration-tests/src/tests/network/stress_network.rs b/integration-tests/src/tests/network/stress_network.rs index 9d565094264..08ebb87daa5 100644 --- a/integration-tests/src/tests/network/stress_network.rs +++ b/integration-tests/src/tests/network/stress_network.rs @@ -2,13 +2,11 @@ use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; use std::sync::Arc; use std::time::Duration; -use actix::actors::mocker::Mocker; use actix::{Actor, AsyncContext, System}; use futures::FutureExt; use tracing::info; use near_actix_test_utils::run_actix; -use near_client::{ClientActor, ViewClientActor}; use near_network::time; use near_o11y::testonly::init_test_logger_allow_panic; use near_primitives::block::GenesisId; @@ -17,14 +15,9 @@ use near_network::config; use near_network::test_utils::{ convert_boot_nodes, open_port, GetInfo, StopSignal, WaitOrTimeoutActor, }; -use near_network::types::NetworkClientResponses; -use near_network::types::NetworkViewClientResponses; use near_network::PeerManagerActor; use near_o11y::WithSpanContextExt; -type ClientMock = Mocker; -type ViewClientMock = Mocker; - fn make_peer_manager( seed: &str, port: u16, @@ -32,19 +25,11 @@ fn make_peer_manager( ) -> actix::Addr { let mut config = config::NetworkConfig::from_seed(seed, port); config.boot_nodes = convert_boot_nodes(boot_nodes); - let client_addr = ClientMock::mock(Box::new(move |_msg, _ctx| { - Box::new(Some(NetworkClientResponses::NoResponse)) - })) - .start(); - let view_client_addr = ViewClientMock::mock(Box::new(|_msg, _ctx| { - Box::new(Some(NetworkViewClientResponses::NoResponse)) - })) - .start(); PeerManagerActor::spawn( time::Clock::real(), near_store::db::TestDB::new(), config, - near_network::client::Client::new(client_addr.recipient(), view_client_addr.recipient()), + Arc::new(near_network::client::Noop), GenesisId::default(), ) .unwrap() diff --git a/nearcore/src/lib.rs b/nearcore/src/lib.rs index 47f3f748673..fcde2590e5a 100644 --- a/nearcore/src/lib.rs +++ b/nearcore/src/lib.rs @@ -187,10 +187,7 @@ pub fn start_with_config_and_synchronization( time::Clock::real(), store.into_inner(near_store::Temperature::Hot), config.network_config, - near_network::client::Client::new( - client_actor.clone().recipient(), - view_client.clone().recipient(), - ), + Arc::new(near_client::adapter::Adapter::new(client_actor.clone(), view_client.clone())), genesis_id, ) .context("PeerManager::spawn()")?; diff --git a/tools/chainsync-loadtest/Cargo.toml b/tools/chainsync-loadtest/Cargo.toml index 41a4216fb6e..f4336985190 100644 --- a/tools/chainsync-loadtest/Cargo.toml +++ b/tools/chainsync-loadtest/Cargo.toml @@ -15,6 +15,7 @@ name = "chainsync-loadtest" [dependencies] actix.workspace = true anyhow.workspace = true +async-trait.workspace = true clap.workspace = true dirs.workspace = true futures.workspace = true diff --git a/tools/chainsync-loadtest/src/main.rs b/tools/chainsync-loadtest/src/main.rs index 00dbced8611..caee136068d 100644 --- a/tools/chainsync-loadtest/src/main.rs +++ b/tools/chainsync-loadtest/src/main.rs @@ -4,13 +4,12 @@ mod network; use std::sync::Arc; -use actix::{Actor, Arbiter}; use anyhow::{anyhow, Context}; use clap::Parser; use openssl_probe; use concurrency::{Ctx, Scope}; -use network::{FakeClientActor, Network}; +use network::Network; use near_chain_configs::Genesis; use near_network::time; @@ -38,19 +37,12 @@ fn genesis_hash(chain_id: &str) -> CryptoHash { pub fn start_with_config(config: NearConfig, qps_limit: u32) -> anyhow::Result> { let network_adapter = Arc::new(NetworkRecipient::default()); let network = Network::new(&config, network_adapter.clone(), qps_limit); - let client_actor = FakeClientActor::start_in_arbiter(&Arbiter::new().handle(), { - let network = network.clone(); - move |_| FakeClientActor::new(network) - }); let network_actor = PeerManagerActor::spawn( time::Clock::real(), near_store::db::TestDB::new(), config.network_config, - near_network::client::Client::new( - client_actor.clone().recipient(), - client_actor.clone().recipient(), - ), + network.clone(), GenesisId { chain_id: config.client_config.chain_id.clone(), hash: genesis_hash(&config.client_config.chain_id), diff --git a/tools/chainsync-loadtest/src/network.rs b/tools/chainsync-loadtest/src/network.rs index e77b19a13fc..923976a4c63 100644 --- a/tools/chainsync-loadtest/src/network.rs +++ b/tools/chainsync-loadtest/src/network.rs @@ -1,30 +1,31 @@ -use std::sync::atomic::{AtomicU64, Ordering}; - use crate::concurrency::{Ctx, Once, RateLimiter, Scope, WeakMap}; - +use log::info; +use near_network::time; use near_network::types::{ - AccountIdOrPeerTrackingShard, NetworkViewClientMessages, NetworkViewClientResponses, - PartialEncodedChunkRequestMsg, PartialEncodedChunkResponseMsg, + AccountIdOrPeerTrackingShard, PartialEncodedChunkForwardMsg, PartialEncodedChunkRequestMsg, + PartialEncodedChunkResponseMsg, ReasonForBan, StateResponseInfo, }; - -use actix::{Actor, Context, Handler}; -use log::info; use near_network::types::{ - FullPeerInfo, NetworkClientMessages, NetworkClientResponses, NetworkInfo, NetworkRequests, - PeerManagerAdapter, PeerManagerMessageRequest, + FullPeerInfo, NetworkInfo, NetworkRequests, PeerManagerAdapter, PeerManagerMessageRequest, }; -use near_o11y::{WithSpanContext, WithSpanContextExt}; -use near_primitives::block::{Block, BlockHeader}; +use near_o11y::WithSpanContextExt; +use near_primitives::block::{Approval, Block, BlockHeader}; +use near_primitives::challenge::Challenge; use near_primitives::hash::CryptoHash; -use near_primitives::sharding::{ChunkHash, ShardChunkHeader}; +use near_primitives::network::{AnnounceAccount, PeerId}; +use near_primitives::sharding::ShardChunkHeader; +use near_primitives::sharding::{ChunkHash, PartialEncodedChunk}; use near_primitives::time::Clock; +use near_primitives::transaction::SignedTransaction; +use near_primitives::types::{AccountId, EpochId, ShardId}; +use near_primitives::views::FinalExecutionOutcomeView; use nearcore::config::NearConfig; use rand::seq::SliceRandom; use rand::thread_rng; use std::future::Future; +use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::{Arc, Mutex}; use tokio::sync::oneshot; -use tokio::time; #[derive(Default, Debug)] pub struct Stats { @@ -96,10 +97,10 @@ impl Network { min_peers: config.client_config.min_num_peers, parts_per_chunk: config.genesis.config.num_block_producer_seats, rate_limiter: RateLimiter::new( - time::Duration::from_secs(1) / qps_limit, + tokio::time::Duration::from_secs(1) / qps_limit, qps_limit as u64, ), - request_timeout: time::Duration::from_secs(2), + request_timeout: tokio::time::Duration::from_secs(2), }) } @@ -248,87 +249,104 @@ impl Network { }) .await } +} - fn notify(&self, msg: WithSpanContext) { - let msg = msg.msg; - self.stats.msgs_recv.fetch_add(1, Ordering::Relaxed); - match msg { - NetworkClientMessages::NetworkInfo(info) => { - let mut n = self.data.lock().unwrap(); - n.info_ = Arc::new(info); - if n.info_.num_connected_peers < self.min_peers { - info!("connected = {}/{}", n.info_.num_connected_peers, self.min_peers); - return; - } - for s in n.info_futures.split_off(0) { - s.send(n.info_.clone()).unwrap(); - } - } - NetworkClientMessages::Block(block, _, _) => { - self.blocks.get(&block.hash().clone()).map(|p| p.set(block)); - } - NetworkClientMessages::BlockHeaders(headers, _) => { - if let Some(h) = headers.iter().min_by_key(|h| h.height()) { - let hash = h.prev_hash().clone(); - self.block_headers.get(&hash).map(|p| p.set(headers)); - } - } - NetworkClientMessages::PartialEncodedChunkResponse(resp, _) => { - self.chunks.get(&resp.chunk_hash.clone()).map(|p| p.set(resp)); - } - _ => {} - } +#[async_trait::async_trait] +impl near_network::client::Client for Network { + async fn tx_status_request( + &self, + _account_id: AccountId, + _tx_hash: CryptoHash, + ) -> Option> { + None } -} -pub struct FakeClientActor { - network: Arc, -} + async fn tx_status_response(&self, _tx_result: FinalExecutionOutcomeView) {} -impl FakeClientActor { - pub fn new(network: Arc) -> Self { - FakeClientActor { network } + async fn state_request_header( + &self, + _shard_id: ShardId, + _sync_hash: CryptoHash, + ) -> Result, ReasonForBan> { + Ok(None) } -} -impl Actor for FakeClientActor { - type Context = Context; -} + async fn state_request_part( + &self, + _shard_id: ShardId, + _sync_hash: CryptoHash, + _part_id: u64, + ) -> Result, ReasonForBan> { + Ok(None) + } -impl Handler> for FakeClientActor { - type Result = NetworkViewClientResponses; - fn handle( - &mut self, - msg: WithSpanContext, - _ctx: &mut Self::Context, - ) -> Self::Result { - let msg = msg.msg; - let name = match msg { - NetworkViewClientMessages::TxStatus { .. } => "TxStatus", - NetworkViewClientMessages::TxStatusResponse(_) => "TxStatusResponse", - NetworkViewClientMessages::BlockRequest(_) => "BlockRequest", - NetworkViewClientMessages::BlockHeadersRequest(_) => "BlockHeadersRequest", - NetworkViewClientMessages::StateRequestHeader { .. } => "StateRequestHeader", - NetworkViewClientMessages::StateRequestPart { .. } => "StateRequestPart", - NetworkViewClientMessages::AnnounceAccount(_) => { - return NetworkViewClientResponses::NoResponse; - } - #[allow(unreachable_patterns)] - _ => "unknown", - }; - info!("view_request: {}", name); - return NetworkViewClientResponses::NoResponse; + async fn state_response(&self, _info: StateResponseInfo) {} + + async fn block_approval(&self, _approval: Approval, _peer_id: PeerId) {} + + async fn transaction(&self, _transaction: SignedTransaction, _is_forwarded: bool) {} + + async fn partial_encoded_chunk_request( + &self, + _req: PartialEncodedChunkRequestMsg, + _msg_hash: CryptoHash, + ) { + } + + async fn partial_encoded_chunk_response( + &self, + resp: PartialEncodedChunkResponseMsg, + _timestamp: time::Instant, + ) { + self.chunks.get(&resp.chunk_hash.clone()).map(|p| p.set(resp)); + } + + async fn partial_encoded_chunk(&self, _chunk: PartialEncodedChunk) {} + + async fn partial_encoded_chunk_forward(&self, _msg: PartialEncodedChunkForwardMsg) {} + + async fn block_request(&self, _hash: CryptoHash) -> Option> { + None + } + + async fn block_headers_request(&self, _hashes: Vec) -> Option> { + None + } + + async fn block(&self, block: Block, _peer_id: PeerId, _was_requested: bool) { + self.blocks.get(&block.hash().clone()).map(|p| p.set(block)); + } + + async fn block_headers( + &self, + headers: Vec, + _peer_id: PeerId, + ) -> Result<(), ReasonForBan> { + if let Some(h) = headers.iter().min_by_key(|h| h.height()) { + let hash = h.prev_hash().clone(); + self.block_headers.get(&hash).map(|p| p.set(headers)); + } + Ok(()) + } + + async fn challenge(&self, _challenge: Challenge) {} + + async fn network_info(&self, info: NetworkInfo) { + let mut n = self.data.lock().unwrap(); + n.info_ = Arc::new(info); + if n.info_.num_connected_peers < self.min_peers { + info!("connected = {}/{}", n.info_.num_connected_peers, self.min_peers); + return; + } + for s in n.info_futures.split_off(0) { + s.send(n.info_.clone()).unwrap(); + } } -} -impl Handler> for FakeClientActor { - type Result = NetworkClientResponses; - fn handle( - &mut self, - msg: WithSpanContext, - _ctx: &mut Context, - ) -> Self::Result { - self.network.notify(msg); - return NetworkClientResponses::NoResponse; + async fn announce_account( + &self, + accounts: Vec<(AnnounceAccount, Option)>, + ) -> Result, ReasonForBan> { + Ok(accounts.into_iter().map(|a| a.0).collect()) } } From 2a31f651e1d9040682846cfcbffd2c19ed4f686a Mon Sep 17 00:00:00 2001 From: Marcelo Diop-Gonzalez Date: Mon, 24 Oct 2022 14:48:42 -0400 Subject: [PATCH 015/103] mirror: make it easier to see what happened in debug logs (#7900) this will print more easy to understand info on which source chain transactions are making it into the target chain. for now we just log them to debug logs but it would be nice to have some HTTP debug page that shows an easy to understand summary --- tools/mirror/src/chain_tracker.rs | 189 ++++++++++++++++++++++++++---- tools/mirror/src/lib.rs | 154 +++++++++++++++++------- 2 files changed, 276 insertions(+), 67 deletions(-) diff --git a/tools/mirror/src/chain_tracker.rs b/tools/mirror/src/chain_tracker.rs index 40d7cfd8635..1fb399d6046 100644 --- a/tools/mirror/src/chain_tracker.rs +++ b/tools/mirror/src/chain_tracker.rs @@ -1,22 +1,73 @@ -use crate::MappedBlock; +use crate::{MappedBlock, MappedTx}; use near_crypto::PublicKey; use near_indexer::StreamerMessage; use near_indexer_primitives::IndexerTransactionWithOutcome; use near_primitives::hash::CryptoHash; -use near_primitives::transaction::SignedTransaction; use near_primitives::types::{AccountId, BlockHeight}; -use near_primitives_core::types::{Nonce, ShardId}; +use near_primitives_core::types::{Gas, Nonce, ShardId}; use std::cmp::Ordering; use std::collections::hash_map; use std::collections::HashMap; use std::collections::{BTreeSet, VecDeque}; +use std::fmt::Write; use std::pin::Pin; use std::time::{Duration, Instant}; +// Information related to a single transaction that we sent in the past. +// We could just forget it and not save any of this, but keeping this info +// makes it easy to print out human-friendly info later on when we find this +// transaction on chain. struct TxSendInfo { sent_at: Instant, source_height: BlockHeight, - target_height: BlockHeight, + source_tx_index: usize, + source_shard_id: ShardId, + source_signer_id: AccountId, + source_receiver_id: AccountId, + target_signer_id: Option, + target_receiver_id: Option, + actions: Vec, + sent_at_target_height: BlockHeight, +} + +impl TxSendInfo { + fn new( + tx: &MappedTx, + source_shard_id: ShardId, + source_height: BlockHeight, + target_height: BlockHeight, + now: Instant, + ) -> Self { + let target_signer_id = if &tx.source_signer_id != &tx.target_tx.transaction.signer_id { + Some(tx.target_tx.transaction.signer_id.clone()) + } else { + None + }; + let target_receiver_id = if &tx.source_receiver_id != &tx.target_tx.transaction.receiver_id + { + Some(tx.target_tx.transaction.receiver_id.clone()) + } else { + None + }; + Self { + source_height, + source_shard_id: source_shard_id, + source_tx_index: tx.source_tx_index, + source_signer_id: tx.source_signer_id.clone(), + source_receiver_id: tx.source_receiver_id.clone(), + target_signer_id, + target_receiver_id, + sent_at: now, + sent_at_target_height: target_height, + actions: tx + .target_tx + .transaction + .actions + .iter() + .map(|a| a.as_ref().to_string()) + .collect::>(), + } + } } #[derive(PartialEq, Eq, Debug)] @@ -103,6 +154,20 @@ impl<'a> Iterator for TxAwaitingNonceIter<'a> { } } +fn gas_pretty(gas: Gas) -> String { + if gas < 1000 { + format!("{} gas", gas) + } else if gas < 1_000_000 { + format!("{} Kgas", gas / 1000) + } else if gas < 1_000_000_000 { + format!("{} Mgas", gas / 1_000_000) + } else if gas < 1_000_000_000_000 { + format!("{} Ggas", gas / 1_000_000_000) + } else { + format!("{} Tgas", gas / 1_000_000_000_000) + } +} + // Keeps the queue of upcoming transactions and provides them in regular intervals via next_batch() // Also keeps track of txs we've sent so far and looks for them on chain, for metrics/logging purposes. #[derive(Default)] @@ -297,19 +362,90 @@ impl TxTracker { } } + fn log_target_block(&self, msg: &StreamerMessage) { + // don't do any work here if we're definitely not gonna log it + if tracing::level_filters::LevelFilter::current() + > tracing::level_filters::LevelFilter::DEBUG + { + return; + } + + // right now we're just logging this, but it would be nice to collect/index this + // and have some HTTP debug page where you can see how close the target chain is + // to the source chain + let mut log_message = String::new(); + let now = Instant::now(); + + for s in msg.shards.iter() { + let mut other_txs = 0; + if let Some(c) = &s.chunk { + if c.header.height_included == msg.block.header.height { + write!( + log_message, + "-------- shard {} gas used: {} ---------\n", + s.shard_id, + gas_pretty(c.header.gas_used) + ) + .unwrap(); + for tx in c.transactions.iter() { + if let Some(info) = self.sent_txs.get(&tx.transaction.hash) { + write!( + log_message, + "source #{}{} tx #{} signer: \"{}\"{} receiver: \"{}\"{} actions: <{}> sent {:?} ago @ target #{}\n", + info.source_height, + if s.shard_id == info.source_shard_id { + String::new() + } else { + format!(" (source shard {})", info.source_shard_id) + }, + info.source_tx_index, + info.source_signer_id, + info.target_signer_id.as_ref().map_or(String::new(), |s| format!(" (mapped to \"{}\")", s)), + info.source_receiver_id, + info.target_receiver_id.as_ref().map_or(String::new(), |s| format!(" (mapped to \"{}\")", s)), + info.actions.join(", "), + now - info.sent_at, + info.sent_at_target_height, + ).unwrap(); + } else { + other_txs += 1; + } + } + } else { + write!( + log_message, + "-------- shard {} old chunk (#{}) ---------\n", + s.shard_id, c.header.height_included + ) + .unwrap(); + } + } else { + write!(log_message, "-------- shard {} chunk missing ---------\n", s.shard_id) + .unwrap(); + } + if other_txs > 0 { + write!(log_message, " ... \n").unwrap(); + write!( + log_message, + "{} other txs (not ours, or sent before a restart)\n", + other_txs + ) + .unwrap(); + write!(log_message, " ... \n").unwrap(); + } + } + tracing::debug!(target: "mirror", "received target block #{}:\n{}", msg.block.header.height, log_message); + } + pub(crate) fn on_target_block(&mut self, msg: &StreamerMessage) { self.record_block_timestamp(msg); + self.log_target_block(msg); + for s in msg.shards.iter() { if let Some(c) = &s.chunk { for tx in c.transactions.iter() { - if let Some(send_info) = self.sent_txs.remove(&tx.transaction.hash) { - let latency = Instant::now() - send_info.sent_at; - tracing::debug!( - target: "mirror", "found my tx {} from source #{} in target #{} {:?} after sending @ target #{}", - tx.transaction.hash, send_info.source_height, msg.block.header.height, latency, send_info.target_height - ); + if self.sent_txs.remove(&tx.transaction.hash).is_some() { crate::metrics::TRANSACTIONS_INCLUDED.inc(); - self.remove_tx(tx); } } @@ -319,11 +455,13 @@ impl TxTracker { fn on_tx_sent( &mut self, - tx: &SignedTransaction, + tx: &MappedTx, + source_shard_id: ShardId, source_height: BlockHeight, target_height: BlockHeight, + now: Instant, ) { - let hash = tx.get_hash(); + let hash = tx.target_tx.get_hash(); if self.sent_txs.contains_key(&hash) { tracing::warn!(target: "mirror", "transaction sent twice: {}", &hash); return; @@ -332,21 +470,24 @@ impl TxTracker { // TODO: don't keep adding txs if we're not ever finding them on chain, since we'll OOM eventually // if that happens. self.sent_txs - .insert(hash, TxSendInfo { sent_at: Instant::now(), source_height, target_height }); + .insert(hash, TxSendInfo::new(tx, source_shard_id, source_height, target_height, now)); let txs = self .txs_by_signer - .entry((tx.transaction.signer_id.clone(), tx.transaction.public_key.clone())) + .entry(( + tx.target_tx.transaction.signer_id.clone(), + tx.target_tx.transaction.public_key.clone(), + )) .or_default(); if let Some(highest_nonce) = txs.iter().next_back() { - if highest_nonce.nonce > tx.transaction.nonce { + if highest_nonce.nonce > tx.target_tx.transaction.nonce { tracing::warn!( target: "mirror", "transaction sent with out of order nonce: {}: {}. Sent so far: {:?}", - &hash, tx.transaction.nonce, txs + &hash, tx.target_tx.transaction.nonce, txs ); } } - if !txs.insert(TxId { hash, nonce: tx.transaction.nonce }) { + if !txs.insert(TxId { hash, nonce: tx.target_tx.transaction.nonce }) { tracing::warn!(target: "mirror", "inserted tx {} twice into txs_by_signer", &hash); } } @@ -405,16 +546,20 @@ impl TxTracker { // We just successfully sent some transactions. Remember them so we can see if they really show up on chain. pub(crate) fn on_txs_sent( &mut self, - txs: &[SignedTransaction], + txs: &[(ShardId, Vec)], source_height: BlockHeight, target_height: BlockHeight, ) { + let num_txs: usize = txs.iter().map(|(_, txs)| txs.len()).sum(); tracing::info!( target: "mirror", "Sent {} transactions from source #{} with target HEAD @ #{}", - txs.len(), source_height, target_height + num_txs, source_height, target_height ); - for tx in txs.iter() { - self.on_tx_sent(tx, source_height, target_height); + let now = Instant::now(); + for (shard_id, txs) in txs.iter() { + for tx in txs.iter() { + self.on_tx_sent(tx, *shard_id, source_height, target_height, now); + } } let block_delay = self diff --git a/tools/mirror/src/lib.rs b/tools/mirror/src/lib.rs index b53f7c200a8..079ffe74472 100644 --- a/tools/mirror/src/lib.rs +++ b/tools/mirror/src/lib.rs @@ -147,19 +147,65 @@ fn open_db>(home: P, config: &NearConfig) -> anyhow::Result { // a transaction that's almost prepared, except that we don't yet know // what nonce to use because the public key was added in an AddKey -// action that we haven't seen on chain yet. The tx field is complete +// action that we haven't seen on chain yet. The target_tx field is complete // except for the nonce field. #[derive(Debug)] struct TxAwaitingNonce { source_public: PublicKey, source_signer_id: AccountId, + source_receiver_id: AccountId, + source_tx_index: usize, target_private: SecretKey, - tx: Transaction, + target_tx: Transaction, +} + +impl TxAwaitingNonce { + fn new( + source_tx: &SignedTransactionView, + source_tx_index: usize, + target_tx: Transaction, + target_private: SecretKey, + ) -> Self { + Self { + source_public: source_tx.public_key.clone(), + source_signer_id: source_tx.signer_id.clone(), + source_receiver_id: source_tx.receiver_id.clone(), + source_tx_index, + target_private, + target_tx, + } + } +} + +// A transaction meant for the target chain that is complete/ready to send. +// We keep some extra info about the transaction for the purposes of logging +// later on when we find it on chain. +#[derive(Debug)] +struct MappedTx { + source_signer_id: AccountId, + source_receiver_id: AccountId, + source_tx_index: usize, + target_tx: SignedTransaction, +} + +impl MappedTx { + fn new( + source_tx: &SignedTransactionView, + source_tx_index: usize, + target_tx: SignedTransaction, + ) -> Self { + Self { + source_signer_id: source_tx.signer_id.clone(), + source_receiver_id: source_tx.receiver_id.clone(), + source_tx_index, + target_tx, + } + } } #[derive(Debug)] enum TargetChainTx { - Ready(SignedTransaction), + Ready(MappedTx), AwaitingNonce(TxAwaitingNonce), } @@ -169,16 +215,21 @@ impl TargetChainTx { fn set_nonce(&mut self, nonce: Nonce) { match self { Self::AwaitingNonce(t) => { - t.tx.nonce = nonce; - let tx = SignedTransaction::new( - t.target_private.sign(&t.tx.get_hash_and_size().0.as_ref()), - t.tx.clone(), + t.target_tx.nonce = nonce; + let target_tx = SignedTransaction::new( + t.target_private.sign(&t.target_tx.get_hash_and_size().0.as_ref()), + t.target_tx.clone(), ); tracing::debug!( target: "mirror", "prepared a transaction for ({:?}, {:?}) that was previously waiting for the access key to appear on chain", - &tx.transaction.signer_id, &tx.transaction.public_key + &t.source_signer_id, &t.source_public, ); - *self = Self::Ready(tx); + *self = Self::Ready(MappedTx { + source_signer_id: t.source_signer_id.clone(), + source_receiver_id: t.source_receiver_id.clone(), + source_tx_index: t.source_tx_index, + target_tx, + }); } Self::Ready(_) => unreachable!(), } @@ -429,18 +480,19 @@ impl TxMirror { async fn send_transactions( &mut self, - block: &MappedBlock, - ) -> anyhow::Result> { + block: MappedBlock, + ) -> anyhow::Result)>> { let mut sent = vec![]; - for chunk in block.chunks.iter() { - for tx in chunk.txs.iter() { + for chunk in block.chunks { + let mut txs = vec![]; + for tx in chunk.txs { match tx { TargetChainTx::Ready(tx) => { match self .target_client .send( NetworkClientMessages::Transaction { - transaction: tx.clone(), + transaction: tx.target_tx.clone(), is_forwarded: false, check_only: false, } @@ -450,7 +502,7 @@ impl TxMirror { { NetworkClientResponses::RequestRouted => { crate::metrics::TRANSACTIONS_SENT.with_label_values(&["ok"]).inc(); - sent.push(tx.clone()); + txs.push(tx); } NetworkClientResponses::InvalidTx(e) => { // TODO: here if we're getting an error because the tx was already included, it is possible @@ -481,6 +533,7 @@ impl TxMirror { } } } + sent.push((chunk.shard_id, txs)); } Ok(sent) } @@ -761,61 +814,71 @@ impl TxMirror { } let mut num_not_ready = 0; - for t in chunk.transactions { - let actions = self.map_actions(&t, &prev_hash).await?; + for (idx, source_tx) in chunk.transactions.into_iter().enumerate() { + let actions = self.map_actions(&source_tx, &prev_hash).await?; if actions.is_empty() { // If this is a tx containing only stake actions, skip it. continue; } - let mapped_key = crate::key_mapping::map_key(&t.public_key, self.secret.as_ref()); + let mapped_key = + crate::key_mapping::map_key(&source_tx.public_key, self.secret.as_ref()); let public_key = mapped_key.public_key(); let target_signer_id = - crate::key_mapping::map_account(&t.signer_id, self.secret.as_ref()); + crate::key_mapping::map_account(&source_tx.signer_id, self.secret.as_ref()); match self - .map_nonce(&t.signer_id, &target_signer_id, &t.public_key, &public_key, t.nonce) + .map_nonce( + &source_tx.signer_id, + &target_signer_id, + &source_tx.public_key, + &public_key, + source_tx.nonce, + ) .await? { Ok(nonce) => { - let mut tx = Transaction::new( + let mut target_tx = Transaction::new( target_signer_id, public_key, - crate::key_mapping::map_account(&t.receiver_id, self.secret.as_ref()), + crate::key_mapping::map_account( + &source_tx.receiver_id, + self.secret.as_ref(), + ), nonce, ref_hash.clone(), ); - tx.actions = actions; - let tx = SignedTransaction::new( - mapped_key.sign(&tx.get_hash_and_size().0.as_ref()), - tx, + target_tx.actions = actions; + let target_tx = SignedTransaction::new( + mapped_key.sign(&target_tx.get_hash_and_size().0.as_ref()), + target_tx, ); - txs.push(TargetChainTx::Ready(tx)); + txs.push(TargetChainTx::Ready(MappedTx::new(&source_tx, idx, target_tx))); } Err(e) => match e { MapNonceError::AddOverflow(..) | MapNonceError::SubOverflow(..) | MapNonceError::SourceKeyNotOnChain => { - tracing::error!(target: "mirror", "error mapping nonce for ({:?}, {:?}): {:?}", &t.signer_id, &public_key, e); + tracing::error!(target: "mirror", "error mapping nonce for ({:?}, {:?}): {:?}", &source_tx.signer_id, &public_key, e); continue; } MapNonceError::TargetKeyNotOnChain => { - let mut tx = Transaction::new( - crate::key_mapping::map_account(&t.signer_id, self.secret.as_ref()), + let mut target_tx = Transaction::new( + crate::key_mapping::map_account( + &source_tx.signer_id, + self.secret.as_ref(), + ), public_key, crate::key_mapping::map_account( - &t.receiver_id, + &source_tx.receiver_id, self.secret.as_ref(), ), - t.nonce, + source_tx.nonce, ref_hash.clone(), ); - tx.actions = actions; - txs.push(TargetChainTx::AwaitingNonce(TxAwaitingNonce { - tx, - source_public: t.public_key.clone(), - source_signer_id: t.signer_id.clone(), - target_private: mapped_key, - })); + target_tx.actions = actions; + txs.push(TargetChainTx::AwaitingNonce(TxAwaitingNonce::new( + &source_tx, idx, target_tx, mapped_key, + ))); num_not_ready += 1; } }, @@ -931,10 +994,10 @@ impl TxMirror { let nonce = self .map_nonce( &tx.source_signer_id, - &tx.tx.signer_id, + &tx.target_tx.signer_id, &tx.source_public, - &tx.tx.public_key, - tx.tx.nonce, + &tx.target_tx.public_key, + tx.target_tx.nonce, ) .await? .unwrap(); @@ -962,13 +1025,14 @@ impl TxMirror { // time to send a batch of transactions mapped_block = tracker.next_batch(), if tracker.num_blocks_queued() > 0 => { let mapped_block = mapped_block.unwrap(); - let sent = self.send_transactions(&mapped_block).await?; - tracker.on_txs_sent(&sent, mapped_block.source_height, target_height); + let source_height = mapped_block.source_height; + let sent = self.send_transactions(mapped_block).await?; + tracker.on_txs_sent(&sent, source_height, target_height); // now we have one second left until we need to send more transactions. In the // meantime, we might as well prepare some more batches of transactions. // TODO: continue in best effort fashion on error - self.set_next_source_height(mapped_block.source_height+1)?; + self.set_next_source_height(source_height+1)?; self.queue_txs(&mut tracker, target_head, true).await?; } msg = self.target_stream.recv() => { From 9683b6fa8afaf330fa24f427f478842e4de97b47 Mon Sep 17 00:00:00 2001 From: Michal Nazarewicz Date: Mon, 24 Oct 2022 21:31:57 +0100 Subject: [PATCH 016/103] chain: remove TxStatusError::InvalidTx variant (#7915) The TxStatusError::InvalidTx variant is never constructed so get rid of it. --- chain/client-primitives/src/types.rs | 2 -- chain/jsonrpc/src/api/transactions.rs | 1 - chain/rosetta-rpc/src/errors.rs | 4 ---- 3 files changed, 7 deletions(-) diff --git a/chain/client-primitives/src/types.rs b/chain/client-primitives/src/types.rs index 785262a87bd..ad2de642944 100644 --- a/chain/client-primitives/src/types.rs +++ b/chain/client-primitives/src/types.rs @@ -8,7 +8,6 @@ use chrono::DateTime; use near_primitives::time::Utc; use near_chain_configs::ProtocolConfigView; -use near_primitives::errors::InvalidTxError; use near_primitives::hash::CryptoHash; use near_primitives::merkle::{MerklePath, PartialMerkleTree}; use near_primitives::network::PeerId; @@ -595,7 +594,6 @@ pub struct TxStatus { pub enum TxStatusError { ChainError(near_chain_primitives::Error), MissingTransaction(CryptoHash), - InvalidTx(InvalidTxError), InternalError(String), TimeoutError, } diff --git a/chain/jsonrpc/src/api/transactions.rs b/chain/jsonrpc/src/api/transactions.rs index b34825a99e8..06b6ebaa79d 100644 --- a/chain/jsonrpc/src/api/transactions.rs +++ b/chain/jsonrpc/src/api/transactions.rs @@ -47,7 +47,6 @@ impl RpcFrom for RpcTransactionError { TxStatusError::MissingTransaction(requested_transaction_hash) => { Self::UnknownTransaction { requested_transaction_hash } } - TxStatusError::InvalidTx(context) => Self::InvalidTransaction { context }, TxStatusError::InternalError(debug_info) => Self::InternalError { debug_info }, TxStatusError::TimeoutError => Self::TimeoutError, } diff --git a/chain/rosetta-rpc/src/errors.rs b/chain/rosetta-rpc/src/errors.rs index 52bcb7f9e67..c61ea89b2de 100644 --- a/chain/rosetta-rpc/src/errors.rs +++ b/chain/rosetta-rpc/src/errors.rs @@ -35,10 +35,6 @@ impl From for ErrorKind { near_client::TxStatusError::MissingTransaction(err) => { Self::NotFound(format!("Transaction is missing: {:?}", err)) } - near_client::TxStatusError::InvalidTx(err) => Self::NotFound(format!( - "Transaction is invalid, so it will never be included to the chain: {:?}", - err - )), near_client::TxStatusError::InternalError(_) | near_client::TxStatusError::TimeoutError => { // TODO: remove the statuses from TxStatusError since they are From cb50b86c9dace4971f44aae19bfd140c7efdb8e5 Mon Sep 17 00:00:00 2001 From: Michal Nazarewicz Date: Mon, 24 Oct 2022 23:11:18 +0100 Subject: [PATCH 017/103] =?UTF-8?q?core:=20add=20chain=20Error=20=E2=86=92?= =?UTF-8?q?=20TxStatusError=20conversion=20(#7912)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Having conversion from near_chain_primitves::Error to TxStatusError eliminates a handful of trivial map_error calls. --- chain/client-primitives/src/types.rs | 6 ++++++ chain/client/src/view_client.rs | 17 ++++++----------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/chain/client-primitives/src/types.rs b/chain/client-primitives/src/types.rs index ad2de642944..fedd8af30cd 100644 --- a/chain/client-primitives/src/types.rs +++ b/chain/client-primitives/src/types.rs @@ -598,6 +598,12 @@ pub enum TxStatusError { TimeoutError, } +impl From for TxStatusError { + fn from(error: near_chain_primitives::Error) -> Self { + Self::ChainError(error) + } +} + impl Message for TxStatus { type Result = Result, TxStatusError>; } diff --git a/chain/client/src/view_client.rs b/chain/client/src/view_client.rs index 4557883ac9a..6458a211e76 100644 --- a/chain/client/src/view_client.rs +++ b/chain/client/src/view_client.rs @@ -371,7 +371,7 @@ impl ViewClientActor { } } - let head = self.chain.head().map_err(|e| TxStatusError::ChainError(e))?; + let head = self.chain.head()?; let target_shard_id = self .runtime_adapter .account_id_to_shard_id(&signer_account_id, &head.epoch_id) @@ -386,10 +386,8 @@ impl ViewClientActor { match self.chain.get_final_transaction_result(&tx_hash) { Ok(tx_result) => { let res = if fetch_receipt { - let final_result = self - .chain - .get_final_transaction_result_with_receipt(tx_result) - .map_err(|e| TxStatusError::ChainError(e))?; + let final_result = + self.chain.get_final_transaction_result_with_receipt(tx_result)?; FinalExecutionOutcomeViewEnum::FinalExecutionOutcomeWithReceipt( final_result, ) @@ -407,7 +405,7 @@ impl ViewClientActor { } Err(err) => { warn!(target: "client", ?err, "Error trying to get transaction result"); - Err(TxStatusError::ChainError(err)) + Err(err.into()) } } } else { @@ -417,10 +415,7 @@ impl ViewClientActor { .runtime_adapter .account_id_to_shard_id(&signer_account_id, &head.epoch_id) .map_err(|err| TxStatusError::InternalError(err.to_string()))?; - let validator = self - .chain - .find_validator_for_forwarding(target_shard_id) - .map_err(|e| TxStatusError::ChainError(e))?; + let validator = self.chain.find_validator_for_forwarding(target_shard_id)?; self.network_adapter.do_send( PeerManagerMessageRequest::NetworkRequests(NetworkRequests::TxStatus( @@ -907,7 +902,7 @@ impl Handler> for ViewClientActor { } Err(e) => match e { near_chain::Error::DBNotFoundErr(_) => { - let head = self.chain.head().map_err(|e| TxStatusError::ChainError(e))?; + let head = self.chain.head()?; let target_shard_id = self.runtime_adapter.account_id_to_shard_id(&account_id, &head.epoch_id)?; if self.runtime_adapter.cares_about_shard( From e2856cfaa33f29c02502dcb2e95c3db77800567b Mon Sep 17 00:00:00 2001 From: Jakob Meier Date: Tue, 25 Oct 2022 07:23:00 +0100 Subject: [PATCH 018/103] doc: gas cost parameter chapter (#7918) --- docs/SUMMARY.md | 4 +++ docs/architecture/gas_params/README.md | 31 +++++++++++++++++++ docs/architecture/gas_params/estimator.md | 28 +++++++++++++++++ docs/architecture/gas_params/gas_profile.md | 25 +++++++++++++++ .../gas_params/parameter_definition.md | 13 ++++++++ 5 files changed, 101 insertions(+) create mode 100644 docs/architecture/gas_params/README.md create mode 100644 docs/architecture/gas_params/estimator.md create mode 100644 docs/architecture/gas_params/gas_profile.md create mode 100644 docs/architecture/gas_params/parameter_definition.md diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 795b6c26a43..bcdf59435bf 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -12,6 +12,10 @@ - [Transaction Routing](./architecture/how/tx_routing.md) - [Trie](./architecture/trie.md) - [Network](./architecture/network.md) +- [Gas Cost Parameters](./architecture/gas_params/README.md) + - [Parameter Definitions](./parameter_definition.md) + - [Gas Profile](./gas_profile.md) + - [Runtime Parameter Estimator](./estimator.md) # Practices diff --git a/docs/architecture/gas_params/README.md b/docs/architecture/gas_params/README.md new file mode 100644 index 00000000000..2085e72cdfa --- /dev/null +++ b/docs/architecture/gas_params/README.md @@ -0,0 +1,31 @@ +# Gas Cost Parameters + +NEAR charges gas when executing users' WASM code. The how and why is described +in other documents, such as [Gas basic +concepts](https://docs.near.org/concepts/basics/transactions/gas), [Gas advanced +concepts](https://docs.near.org/concepts/basics/transactions/gas-advanced), and +[the runtime fee specification](https://nomicon.io/RuntimeSpec/Fees/). + +So-called gas cost parameters are part of the protocol definition which can +change between versions. The section on [Parameter Definitions](./parameter_definition.md) +explains how to find the source of truth for such parameter's values in the +nearcore repository and how they can be referenced in code. + +The [Gas Profile](./gas_profile.md) section goes into more details on how gas +costs of a transaction are tracked in nearcore. + +The [runtime parameter estimator](./estimator.md) is a separate binary within +the nearcore repository. It contains benchmarking-like code that is used to +validate existing parameters values. Or when new features are added, new code +has to be added there to estimate the safe values of new parameters. That +section is for you if you want to add new features, such as a new pre-compiled +method or other host functions. + + + + + + + + + \ No newline at end of file diff --git a/docs/architecture/gas_params/estimator.md b/docs/architecture/gas_params/estimator.md new file mode 100644 index 00000000000..d3dd853e193 --- /dev/null +++ b/docs/architecture/gas_params/estimator.md @@ -0,0 +1,28 @@ +# Runtime Parameter Estimator + +The runtime parameter estimator is a byzantine benchmarking suite. Byzantine +benchmarking is not really commonly used term but I feel it describes it quite +well. It measures the performance assuming that up to a third of validators and +all users collude to make the system as slow as possible. + +This benchmarking suite is used check that the gas parameters defined in the +protocol are correct. Correct in this context means, a chunk filled with 1 Pgas +will only take 1 second to be applied. Or more generally, per 1 Tgas of +execution, we spend no more than 1ms wall-clock time. + +For now, nearcore timing is the only one that matters. Things will become more +complicated once there are multiple client implementations. But knowing that +nearcore can serve requests fast enough proofs that it is possible to be at +least as fast. However, we should be careful to not couple costs too tightly +with the specific implementation of nearcore to allow for innovation in new +clients. + +The estimator code is part of the nearcore repository in the directory +[runtime/runtime-params-estimator](https://github.com/near/nearcore/tree/master/runtime/runtime-params-estimator). + + + + + + + \ No newline at end of file diff --git a/docs/architecture/gas_params/gas_profile.md b/docs/architecture/gas_params/gas_profile.md new file mode 100644 index 00000000000..d0fdebedd1b --- /dev/null +++ b/docs/architecture/gas_params/gas_profile.md @@ -0,0 +1,25 @@ +# Gas Profile + +The transaction runtime charges gas in various places around the code. But they +all end up summaries inside an `ActionResult`. More specifically, the counters +`gas_burnt` and `gas_used` and the `profile` field that keeps track of what the +gas has been spent on. + +## Charing Gas +Generally speaking, gas is charged right before the computation that it pays for +is executed. It has to be before to avoid cheap resource exhaustion attacks. +Imagine the user has only 1 gas unit left but we start executing an expensive +step, we would waste a significant duration of compute on all validators without +anyone paying for it. + +When charging gas for an action, the `ActionResult` can be updated directly. But +when charging WASM costs, it would be too slow to do a context switch each time, +Therefore, a fast gas counter exists that can be updated from within the VM. +(See +[gas_counter.rs](https://github.com/near/nearcore/blob/master/runtime/near-vm-logic/src/gas_counter.rs)) +At the end of a function call execution, the gas counter is read by the host and +merged into the `ActionResult`. + + + + diff --git a/docs/architecture/gas_params/parameter_definition.md b/docs/architecture/gas_params/parameter_definition.md new file mode 100644 index 00000000000..4ec8159346b --- /dev/null +++ b/docs/architecture/gas_params/parameter_definition.md @@ -0,0 +1,13 @@ +# Parameter Definitions + +Gas parameters are a subset of runtime parameters that are defined in +[core/primitives/res/runtime_configs/parameters.txt](https://github.com/near/nearcore/blob/master/core/primitives/res/runtime_configs/parameters.txt). +IMPORTANT: This is not the final list of parameters, it contains the base values +which can be overwritten per protocol version. For example, +[53.txt](core/primitives/res/runtime_configs/53.txt) changes several parameters +starting from version 53. To see all parameter values for a specific version at +once, check out list of JSON snapshots generated in this directory: +[core/primitives/src/runtime/snapshots](https://github.com/near/nearcore/blob/master/core/primitives/src/runtime/snapshots). + + + From b8fefd31348e105e4607bac4ea44f7a9c34e5465 Mon Sep 17 00:00:00 2001 From: nikurt <86772482+nikurt@users.noreply.github.com> Date: Tue, 25 Oct 2022 10:31:11 +0200 Subject: [PATCH 019/103] Changelog: include o11y changes (#7889) cc: @posvyatokum --- CHANGELOG.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f5a1de60876..745d1ef9c85 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -52,6 +52,16 @@ a hard limit but instead sets a memory consumption limit. For large trie nodes, the limits are close to equivalent. For small values, there can now fit more in the cache than previously. +* Tracing of work across actix workers within a process: + [#7866](https://github.com/near/nearcore/pull/7866), + [#7819](https://github.com/near/nearcore/pull/7819), + [#7773](https://github.com/near/nearcore/pull/7773). +* Scope of collected tracing information can be configured at run-time: + [#7701](https://github.com/near/nearcore/pull/7701). +* Attach node's `chain_id`, `node_id`, and `account_id` values to tracing + information: [#7711](https://github.com/near/nearcore/pull/7711). +* Change exporter of tracing information from `opentelemetry-jaeger` to + `opentelemetry-otlp`: [#7563](https://github.com/near/nearcore/pull/7563). ## 1.29.0 [2022-08-15] From 01d2fde184db6f15cb07e9ebb3fc100662e41196 Mon Sep 17 00:00:00 2001 From: Michal Nazarewicz Date: Tue, 25 Oct 2022 12:35:22 +0100 Subject: [PATCH 020/103] store: introduce cold storage (#7871) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a `cold_store` Cargo feature which enables the option to configure the node with cold storage. At the moment, all this does is open the cold database and doesn’t enable any other features. The idea is that this can now allow experimenting with code that needs access to the cold storage. --- chain/indexer/src/streamer/mod.rs | 7 +- core/store/Cargo.toml | 1 + core/store/benches/store_bench.rs | 13 +- core/store/src/db.rs | 3 + core/store/src/db/colddb.rs | 20 +- core/store/src/db/rocksdb/snapshot.rs | 2 +- core/store/src/lib.rs | 72 +++++- core/store/src/metadata.rs | 37 ++- core/store/src/opener.rs | 234 +++++++++++++++--- genesis-tools/genesis-populate/src/main.rs | 2 +- .../genesis-populate/src/state_dump.rs | 4 +- nearcore/Cargo.toml | 2 + nearcore/benches/store.rs | 2 +- nearcore/src/config.rs | 8 +- nearcore/src/lib.rs | 34 ++- neard/Cargo.toml | 1 + runtime/runtime-params-estimator/src/main.rs | 9 +- test-utils/store-validator/src/main.rs | 2 +- tools/mirror/src/lib.rs | 5 +- tools/mock-node/src/setup.rs | 2 +- tools/state-viewer/src/cli.rs | 3 +- 21 files changed, 381 insertions(+), 82 deletions(-) diff --git a/chain/indexer/src/streamer/mod.rs b/chain/indexer/src/streamer/mod.rs index 6f6f0986367..4af7dbe7bbd 100644 --- a/chain/indexer/src/streamer/mod.rs +++ b/chain/indexer/src/streamer/mod.rs @@ -290,9 +290,10 @@ pub(crate) async fn start( blocks_sink: mpsc::Sender, ) { info!(target: INDEXER, "Starting Streamer..."); - let indexer_db_path = near_store::NodeStorage::opener(&indexer_config.home_dir, &store_config) - .path() - .join("indexer"); + let indexer_db_path = + near_store::NodeStorage::opener(&indexer_config.home_dir, &store_config, None) + .path() + .join("indexer"); // TODO: implement proper error handling let db = DB::open_default(indexer_db_path).unwrap(); diff --git a/core/store/Cargo.toml b/core/store/Cargo.toml index d56f6cca717..2ac82d946db 100644 --- a/core/store/Cargo.toml +++ b/core/store/Cargo.toml @@ -57,6 +57,7 @@ no_cache = [] single_thread_rocksdb = [] # Deactivate RocksDB IO background threads test_features = [] protocol_feature_flat_state = [] +cold_store = [] nightly_protocol = [] nightly = [ diff --git a/core/store/benches/store_bench.rs b/core/store/benches/store_bench.rs index 4e5bc2284c6..0368b82c5e5 100644 --- a/core/store/benches/store_bench.rs +++ b/core/store/benches/store_bench.rs @@ -19,10 +19,15 @@ fn benchmark_write_then_read_successful( let tmp_dir = tempfile::tempdir().unwrap(); // Use default StoreConfig rather than NodeStorage::test_opener so we’re using the // same configuration as in production. - let store = NodeStorage::opener(tmp_dir.path(), &Default::default()) - .open() - .unwrap() - .get_store(Temperature::Hot); + let store = NodeStorage::opener( + tmp_dir.path(), + &Default::default(), + #[cfg(feature = "cold_store")] + None, + ) + .open() + .unwrap() + .get_store(Temperature::Hot); let keys = generate_keys(num_keys, key_size); write_to_db(&store, &keys, max_value_size, col); diff --git a/core/store/src/db.rs b/core/store/src/db.rs index eb91d0db29d..d7edbdf832a 100644 --- a/core/store/src/db.rs +++ b/core/store/src/db.rs @@ -2,12 +2,15 @@ use std::io; use crate::DBCol; +#[cfg(feature = "cold_store")] mod colddb; pub mod refcount; pub(crate) mod rocksdb; mod slice; mod testdb; +#[cfg(feature = "cold_store")] +pub use self::colddb::ColdDB; pub use self::rocksdb::RocksDB; pub use self::slice::DBSlice; pub use self::testdb::TestDB; diff --git a/core/store/src/db/colddb.rs b/core/store/src/db/colddb.rs index 6ba0f25e5cf..9ac0ff8842a 100644 --- a/core/store/src/db/colddb.rs +++ b/core/store/src/db/colddb.rs @@ -35,9 +35,15 @@ use crate::DBCol; /// Lastly, since no data is ever deleted from cold storage, trying to decrease /// reference of a value count or delete data is ignored and if debug assertions /// are enabled will cause a panic. -struct ColdDatabase(D); +pub struct ColdDB(D); -impl ColdDatabase { +impl std::convert::From for ColdDB { + fn from(db: D) -> Self { + Self(db) + } +} + +impl ColdDB { /// Returns raw bytes from the underlying storage. /// /// Adjusts the key if necessary (see [`get_cold_key`]) and retrieves data @@ -51,7 +57,7 @@ impl ColdDatabase { } } -impl super::Database for ColdDatabase { +impl super::Database for ColdDB { fn get_raw_bytes(&self, col: DBCol, key: &[u8]) -> std::io::Result>> { match self.get_impl(col, key) { Ok(Some(value)) if col.is_rc() => { @@ -262,8 +268,8 @@ mod test { const VALUE: &[u8] = "FooBar".as_bytes(); /// Constructs test in-memory database. - fn create_test_db() -> ColdDatabase { - ColdDatabase(crate::db::testdb::TestDB::default()) + fn create_test_db() -> ColdDB { + ColdDB(crate::db::testdb::TestDB::default()) } fn set(col: DBCol, key: &[u8]) -> DBOp { @@ -338,8 +344,8 @@ mod test { let name = if is_raw { "raw " } else { "cold" }; let value = db.get_raw_bytes(col, &key).unwrap(); - // When fetching reference counted column ColdDatabase adds - // reference count to it. + // When fetching reference counted column ColdDB adds reference + // count to it. let value = pretty_value(value.as_deref(), col.is_rc() && !is_raw); result.push(format!(" [{name}] get_raw_bytes → {value}")); diff --git a/core/store/src/db/rocksdb/snapshot.rs b/core/store/src/db/rocksdb/snapshot.rs index 88d2b32d230..6a2d255cb02 100644 --- a/core/store/src/db/rocksdb/snapshot.rs +++ b/core/store/src/db/rocksdb/snapshot.rs @@ -190,7 +190,7 @@ fn test_snapshot_recovery() { { let mut config = opener.config().clone(); config.path = Some(path); - let opener = crate::NodeStorage::opener(tmpdir.path(), &config); + let opener = crate::NodeStorage::opener(tmpdir.path(), &config, None); let store = opener.open().unwrap().get_store(crate::Temperature::Hot); assert_eq!(Some(&b"value"[..]), store.get(COL, KEY).unwrap().as_deref()); } diff --git a/core/store/src/lib.rs b/core/store/src/lib.rs index 30f1bc2a7f9..ebf242c723f 100644 --- a/core/store/src/lib.rs +++ b/core/store/src/lib.rs @@ -57,8 +57,11 @@ pub use crate::opener::{StoreMigrator, StoreOpener, StoreOpenerError}; /// In the future, certain parts of the code may need to access hot or cold /// storage. Specifically, querying an old block will require reading it from /// the cold storage. +#[derive(Clone, Copy)] pub enum Temperature { Hot, + #[cfg(feature = "cold_store")] + Cold, } /// Node’s storage holding chain and all other necessary data. @@ -68,7 +71,11 @@ pub enum Temperature { /// to [`Store`] which will abstract access to only one of the temperatures of /// the storage. pub struct NodeStorage { - storage: Arc, + hot_storage: Arc, + #[cfg(feature = "cold_store")] + cold_storage: Option>, + #[cfg(not(feature = "cold_store"))] + cold_storage: Option, } /// Node’s single storage source. @@ -84,10 +91,22 @@ pub struct Store { storage: Arc, } +// Those are temporary. While cold_store feature is stabilised, remove those +// type aliases and just use the type directly. +#[cfg(feature = "cold_store")] +pub type ColdConfig<'a> = Option<&'a StoreConfig>; +#[cfg(not(feature = "cold_store"))] +pub type ColdConfig<'a> = Option; + impl NodeStorage { - /// Initialises a new opener with given home directory and store config. - pub fn opener<'a>(home_dir: &std::path::Path, config: &'a StoreConfig) -> StoreOpener<'a> { - StoreOpener::new(home_dir, config) + /// Initialises a new opener with given home directory and hot and cold + /// store config. + pub fn opener<'a>( + home_dir: &std::path::Path, + config: &'a StoreConfig, + cold_config: ColdConfig<'a>, + ) -> StoreOpener<'a> { + StoreOpener::new(home_dir, config, cold_config) } /// Initialises an opener for a new temporary test store. @@ -101,7 +120,7 @@ impl NodeStorage { pub fn test_opener() -> (tempfile::TempDir, StoreOpener<'static>) { static CONFIG: Lazy = Lazy::new(StoreConfig::test_config); let dir = tempfile::tempdir().unwrap(); - let opener = StoreOpener::new(dir.path(), &CONFIG); + let opener = StoreOpener::new(dir.path(), &CONFIG, None); (dir, opener) } @@ -115,7 +134,22 @@ impl NodeStorage { /// possibly [`crate::test_utils::create_test_store`] (depending whether you /// need [`NodeStorage`] or [`Store`] object. pub fn new(storage: Arc) -> Self { - Self { storage } + Self { hot_storage: storage, cold_storage: None } + } + + /// Constructs new object backed by given database. + fn from_rocksdb( + hot_storage: crate::db::RocksDB, + #[cfg(feature = "cold_store")] cold_storage: Option, + #[cfg(not(feature = "cold_store"))] cold_storage: Option, + ) -> Self { + Self { + hot_storage: Arc::new(hot_storage), + #[cfg(feature = "cold_store")] + cold_storage: cold_storage.map(|db| Arc::new(db.into())), + #[cfg(not(feature = "cold_store"))] + cold_storage: cold_storage.map(|_| unreachable!()), + } } /// Returns storage for given temperature. @@ -132,7 +166,9 @@ impl NodeStorage { /// cold. pub fn get_store(&self, temp: Temperature) -> Store { match temp { - Temperature::Hot => Store { storage: self.storage.clone() }, + Temperature::Hot => Store { storage: self.hot_storage.clone() }, + #[cfg(feature = "cold_store")] + Temperature::Cold => Store { storage: self.cold_storage.as_ref().unwrap().clone() }, } } @@ -151,9 +187,11 @@ impl NodeStorage { /// well. For example, garbage collection only ever touches hot storage but /// it should go through [`Store`] interface since data it manipulates /// (e.g. blocks) are live in both databases. - pub fn get_inner(&self, temp: Temperature) -> &Arc { + pub fn _get_inner(&self, temp: Temperature) -> &Arc { match temp { - Temperature::Hot => &self.storage, + Temperature::Hot => &self.hot_storage, + #[cfg(feature = "cold_store")] + Temperature::Cold => todo!(), } } @@ -163,15 +201,27 @@ impl NodeStorage { /// `Arc::clone`. pub fn into_inner(self, temp: Temperature) -> Arc { match temp { - Temperature::Hot => self.storage, + Temperature::Hot => self.hot_storage, + #[cfg(feature = "cold_store")] + Temperature::Cold => self.cold_storage.unwrap(), } } + /// Returns whether the storage has a cold database. + pub fn has_cold(&self) -> bool { + self.cold_storage.is_some() + } + /// Reads database metadata and returns whether the storage is archival. pub fn is_archive(&self) -> io::Result { - Ok(match metadata::DbMetadata::read(self.storage.as_ref())?.kind.unwrap() { + if self.cold_storage.is_some() { + return Ok(true); + } + Ok(match metadata::DbMetadata::read(self.hot_storage.as_ref())?.kind.unwrap() { metadata::DbKind::RPC => false, metadata::DbKind::Archive => true, + #[cfg(feature = "cold_store")] + metadata::DbKind::Hot | metadata::DbKind::Cold => unreachable!(), }) } } diff --git a/core/store/src/metadata.rs b/core/store/src/metadata.rs index debb406e15b..6e7f9e53223 100644 --- a/core/store/src/metadata.rs +++ b/core/store/src/metadata.rs @@ -35,28 +35,53 @@ pub enum DbKind { /// The database is an archive database meaning that it is not garbage /// collected and stores all chain data. Archive, + #[cfg(feature = "cold_store")] + /// The database is Hot meaning that the node runs in archival mode with + /// a paired Cold database. + Hot, + #[cfg(feature = "cold_store")] + /// The database is Cold meaning that the node runs in archival mode with + /// a paired Hot database. + Cold, } pub(super) fn set_store_version(storage: &NodeStorage, version: DbVersion) -> std::io::Result<()> { set_store_metadata(storage, DbMetadata { version, kind: None }) } -pub(super) fn set_store_metadata( +fn set_db_metadata( storage: &NodeStorage, + temp: Temperature, metadata: DbMetadata, ) -> std::io::Result<()> { - let version = metadata.version.to_string().into_bytes(); - let kind = metadata.kind.map(|kind| <&str>::from(kind).as_bytes()); - let mut store_update = storage.get_store(Temperature::Hot).store_update(); - store_update.set(DBCol::DbVersion, VERSION_KEY, &version); + let mut store_update = storage.get_store(temp).store_update(); + store_update.set(DBCol::DbVersion, VERSION_KEY, metadata.version.to_string().as_bytes()); if metadata.version >= DB_VERSION_WITH_KIND { - if let Some(kind) = kind { + #[allow(unused_mut)] + let mut kind = metadata.kind; + #[cfg(feature = "cold_store")] + if matches!(temp, Temperature::Cold) || storage.has_cold() { + kind = Some(if matches!(temp, Temperature::Hot) { DbKind::Hot } else { DbKind::Cold }); + } + if let Some(kind) = kind.map(|kind| <&str>::from(kind).as_bytes()) { store_update.set(DBCol::DbVersion, KIND_KEY, kind); } } store_update.commit() } +pub(super) fn set_store_metadata( + storage: &NodeStorage, + metadata: DbMetadata, +) -> std::io::Result<()> { + set_db_metadata(storage, Temperature::Hot, metadata)?; + #[cfg(feature = "cold_store")] + if storage.has_cold() { + set_db_metadata(storage, Temperature::Cold, metadata)?; + } + Ok(()) +} + /// Metadata about a database. #[derive(Clone, Copy)] pub(super) struct DbMetadata { diff --git a/core/store/src/opener.rs b/core/store/src/opener.rs index 458d59c0d5c..5b81bcb7486 100644 --- a/core/store/src/opener.rs +++ b/core/store/src/opener.rs @@ -5,8 +5,6 @@ use crate::metadata::{ }; use crate::{Mode, NodeStorage, StoreConfig}; -const STORE_PATH: &str = "data"; - #[derive(Debug, thiserror::Error)] pub enum StoreOpenerError { /// I/O or RocksDB-level error while opening or accessing the database. @@ -25,6 +23,25 @@ pub enum StoreOpenerError { #[error("Database already exists")] DbAlreadyExists, + /// Hot database exists but cold doesn’t or the other way around. + #[error("Hot and cold databases must either both exist or not")] + HotColdExistenceMismatch, + + /// Hot and cold databases have different versions. + #[error( + "Hot database version ({hot_version}) doesn’t match \ + cold databases version ({cold_version})" + )] + HotColdVersionMismatch { hot_version: DbVersion, cold_version: DbVersion }, + + /// Database has incorrect kind. + /// + /// Specifically, this happens if node is running with a single database and + /// its kind is not RPC or Archive; or it’s running with two databases and + /// their types aren’t Hot and Cold respectively. + #[error("{which} database kind should be {want} but got {got:?}")] + DbKindMismatch { which: &'static str, got: Option, want: DbKind }, + /// Unable to create a migration snapshot because one already exists. #[error( "Migration snapshot already exists at {0}; \ @@ -110,10 +127,11 @@ impl From for StoreOpenerError { /// .open(); /// ``` pub struct StoreOpener<'a> { - /// Opener for a single RocksDB instance. - /// - /// pub(crate) for testing. - db: DBOpener<'a>, + /// Opener for an instance of RPC or Hot RocksDB store. + hot: DBOpener<'a>, + + /// Opener for an instance of Cold RocksDB store if one was configured. + cold: Option>, /// What kind of database we should expect; if `None`, the kind of the /// database is not checked. @@ -138,8 +156,17 @@ struct DBOpener<'a> { impl<'a> StoreOpener<'a> { /// Initialises a new opener with given home directory and store config. - pub(crate) fn new(home_dir: &std::path::Path, config: &'a StoreConfig) -> Self { - Self { db: DBOpener::new(home_dir, config), expected_kind: None, migrator: None } + pub(crate) fn new( + home_dir: &std::path::Path, + config: &'a StoreConfig, + cold_config: super::ColdConfig<'a>, + ) -> Self { + Self { + hot: DBOpener::new(home_dir, config, "data"), + cold: cold_config.map(|config| ColdDBOpener::new(home_dir, config, "cold-data")), + expected_kind: None, + migrator: None, + } } /// Configures whether archive or RPC storage is expected. @@ -167,12 +194,12 @@ impl<'a> StoreOpener<'a> { /// /// Does not check whether the database actually exists. pub fn path(&self) -> &std::path::Path { - &self.db.path + &self.hot.path } #[cfg(test)] pub(crate) fn config(&self) -> &StoreConfig { - self.db.config + self.hot.config } /// Opens the storage in read-write mode. @@ -190,10 +217,53 @@ impl<'a> StoreOpener<'a> { /// other hand, if mode is [`Mode::Create`], fails if the database already /// exists. pub fn open_in_mode(&self, mode: Mode) -> Result { - if let Some(metadata) = self.db.get_metadata()? { + let hot_meta = self.hot.get_metadata()?; + let cold_meta = self.cold.as_ref().map(|db| db.get_metadata()).transpose()?; + + if let Some(hot_meta) = hot_meta { + if let Some(Some(cold_meta)) = cold_meta { + assert!(cfg!(feature = "cold_store")); + // If cold database exists, hot and cold databases must have the + // same version and to be Hot and Cold kinds respectively. + if hot_meta.version != cold_meta.version { + return Err(StoreOpenerError::HotColdVersionMismatch { + hot_version: hot_meta.version, + cold_version: cold_meta.version, + }); + } + #[cfg(feature = "cold_store")] + if hot_meta.kind != Some(DbKind::Hot) { + return Err(StoreOpenerError::DbKindMismatch { + which: "Hot", + got: hot_meta.kind, + want: DbKind::Hot, + }); + } + #[cfg(feature = "cold_store")] + if cold_meta.kind != Some(DbKind::Cold) { + return Err(StoreOpenerError::DbKindMismatch { + which: "Cold", + got: cold_meta.kind, + want: DbKind::Cold, + }); + } + } else if cold_meta.is_some() { + // If cold database is configured and hot database exists, + // cold database must exist as well. + assert!(cfg!(feature = "cold_store")); + return Err(StoreOpenerError::HotColdExistenceMismatch); + } else if !matches!(hot_meta.kind, None | Some(DbKind::RPC | DbKind::Archive)) { + // If cold database is not configured, hot database must be + // RPC or Archive kind. + return Err(StoreOpenerError::DbKindMismatch { + which: "Hot", + got: hot_meta.kind, + want: self.expected_kind.unwrap_or(DbKind::RPC), + }); + } self.open_existing( mode.but_cannot_create().ok_or(StoreOpenerError::DbAlreadyExists)?, - metadata, + hot_meta, ) } else if mode.can_create() { self.open_new() @@ -207,28 +277,38 @@ impl<'a> StoreOpener<'a> { mode: Mode, metadata: DbMetadata, ) -> Result { - let snapshot = self.apply_migrations(mode, metadata)?; + let snapshots = self.apply_migrations(mode, metadata)?; tracing::info!(target: "near", path=%self.path().display(), "Opening an existing RocksDB database"); - let (storage, metadata) = self.open_storage(mode, DB_VERSION)?; - self.ensure_kind(&storage, metadata)?; - snapshot.remove()?; + let (storage, hot_meta, cold_meta) = self.open_storage(mode, DB_VERSION)?; + if let Some(_cold_meta) = cold_meta { + assert!(cfg!(feature = "cold_store")); + // open_storage has verified this. + #[cfg(feature = "cold_store")] + assert_eq!(Some(DbKind::Hot), hot_meta.kind); + #[cfg(feature = "cold_store")] + assert_eq!(Some(DbKind::Cold), _cold_meta.kind); + } else { + self.ensure_kind(&storage, hot_meta)?; + } + snapshots.0.remove()?; + snapshots.1.remove()?; Ok(storage) } - /// Makes sure that database’s kind + /// Makes sure that database’s kind is correct. fn ensure_kind( &self, storage: &NodeStorage, metadata: DbMetadata, - ) -> Result { + ) -> Result<(), StoreOpenerError> { let expected = match self.expected_kind { Some(kind) => kind, - None => return Ok(metadata.kind.unwrap()), + None => return Ok(()), }; if expected == metadata.kind.unwrap() { - return Ok(expected); + return Ok(()); } if expected == DbKind::RPC { @@ -243,14 +323,15 @@ impl<'a> StoreOpener<'a> { DbMetadata { version: metadata.version, kind: self.expected_kind }, )?; } - return Ok(DbKind::Archive); + Ok(()) } fn open_new(&self) -> Result { tracing::info!(target: "near", path=%self.path().display(), "Creating a new RocksDB database"); - let db = self.db.create()?; - let storage = NodeStorage::new(std::sync::Arc::new(db)); + let hot = self.hot.create()?; + let cold = self.cold.as_ref().map(|db| db.create()).transpose()?; + let storage = NodeStorage::from_rocksdb(hot, cold); set_store_metadata( &storage, DbMetadata { version: DB_VERSION, kind: self.expected_kind.or(Some(DbKind::RPC)) }, @@ -263,9 +344,9 @@ impl<'a> StoreOpener<'a> { &self, mode: Mode, metadata: DbMetadata, - ) -> Result { + ) -> Result<(Snapshot, Snapshot), StoreOpenerError> { if metadata.version == DB_VERSION { - return Ok(Snapshot::none()); + return Ok((Snapshot::none(), Snapshot::none())); } else if metadata.version > DB_VERSION { return Err(StoreOpenerError::DbVersionTooNew { got: metadata.version, @@ -296,7 +377,11 @@ impl<'a> StoreOpener<'a> { }); } - let snapshot = Snapshot::new(&self.db.path, &self.db.config)?; + let hot_snapshot = self.hot.snapshot()?; + let cold_snapshot = match self.cold { + None => Snapshot::none(), + Some(ref opener) => opener.snapshot()?, + }; for version in metadata.version..DB_VERSION { tracing::info!(target: "near", path=%self.path().display(), @@ -314,16 +399,48 @@ impl<'a> StoreOpener<'a> { set_store_version(&storage, 10000)?; } - Ok(snapshot) + Ok((hot_snapshot, cold_snapshot)) } fn open_storage( &self, mode: Mode, want_version: DbVersion, - ) -> std::io::Result<(NodeStorage, DbMetadata)> { - let (db, metadata) = self.db.open(mode, want_version)?; - Ok((NodeStorage::new(std::sync::Arc::new(db)), metadata)) + ) -> std::io::Result<(NodeStorage, DbMetadata, Option)> { + let (hot, hot_meta) = self.hot.open(mode, want_version)?; + let (cold, cold_meta) = + match self.cold.as_ref().map(|opener| opener.open(mode, want_version)).transpose()? { + None => (None, None), + Some((db, meta)) => (Some(db), Some(meta)), + }; + + // Those are mostly sanity checks. If any of those conditions fails + // than either there’s bug in code or someone does something weird on + // the file system and tries to switch databases under us. + if let Some(_cold_meta) = cold_meta { + #[cfg(feature = "cold_store")] + if hot_meta.kind != Some(DbKind::Hot) { + Err((hot_meta.kind, "Hot")) + } else if _cold_meta.kind != Some(DbKind::Cold) { + Err((_cold_meta.kind, "Cold")) + } else { + Ok(()) + } + #[cfg(not(feature = "cold_store"))] + Ok(()) + } else if matches!(hot_meta.kind, None | Some(DbKind::RPC | DbKind::Archive)) { + Ok(()) + } else { + Err((hot_meta.kind, "RPC or Archive")) + } + .map_err(|(got, want)| { + std::io::Error::new( + std::io::ErrorKind::Other, + format!("unexpected DbKind {got:?}; expected {want}"), + ) + })?; + + Ok((NodeStorage::from_rocksdb(hot, cold), hot_meta, cold_meta)) } } @@ -332,9 +449,9 @@ impl<'a> DBOpener<'a> { /// /// The path to the database is resolved based on the path in config with /// given home_dir as base directory for resolving relative paths. - fn new(home_dir: &std::path::Path, config: &'a StoreConfig) -> Self { + fn new(home_dir: &std::path::Path, config: &'a StoreConfig, default_path: &str) -> Self { let path = - home_dir.join(config.path.as_deref().unwrap_or(std::path::Path::new(STORE_PATH))); + home_dir.join(config.path.as_deref().unwrap_or(std::path::Path::new(default_path))); Self { path, config } } @@ -381,6 +498,11 @@ impl<'a> DBOpener<'a> { fn create(&self) -> std::io::Result { RocksDB::open(&self.path, &self.config, Mode::Create) } + + /// Creates a new snapshot for the database. + fn snapshot(&self) -> Result { + Snapshot::new(&self.path, &self.config) + } } pub trait StoreMigrator { @@ -406,3 +528,51 @@ pub trait StoreMigrator { /// equal to [`DB_VERSION`]. fn migrate(&self, storage: &NodeStorage, version: DbVersion) -> anyhow::Result<()>; } + +// This is only here to make conditional compilation simpler. Once cold_store +// feature is stabilised, get rid of it and use DBOpener directly. +use cold_db_opener::ColdDBOpener; + +#[cfg(feature = "cold_store")] +mod cold_db_opener { + pub(super) type ColdDBOpener<'a> = super::DBOpener<'a>; +} + +#[cfg(not(feature = "cold_store"))] +mod cold_db_opener { + use super::*; + + pub(super) enum OpenerImpl {} + + impl OpenerImpl { + pub(super) fn new( + _home_dir: &std::path::Path, + _config: std::convert::Infallible, + _default_path: &str, + ) -> Self { + unreachable!() + } + + pub(super) fn get_metadata(&self) -> std::io::Result> { + unreachable!() + } + + pub(super) fn open( + &self, + _mode: Mode, + _want_version: DbVersion, + ) -> std::io::Result<(std::convert::Infallible, DbMetadata)> { + unreachable!() + } + + pub(super) fn create(&self) -> std::io::Result { + unreachable!() + } + + pub(super) fn snapshot(&self) -> Result { + Ok(Snapshot::none()) + } + } + + pub(super) type ColdDBOpener<'a> = OpenerImpl; +} diff --git a/genesis-tools/genesis-populate/src/main.rs b/genesis-tools/genesis-populate/src/main.rs index 2260776d0b5..a69037aad1b 100644 --- a/genesis-tools/genesis-populate/src/main.rs +++ b/genesis-tools/genesis-populate/src/main.rs @@ -25,7 +25,7 @@ fn main() { let near_config = load_config(home_dir, GenesisValidationMode::Full) .unwrap_or_else(|e| panic!("Error loading config: {:#}", e)); - let store = near_store::NodeStorage::opener(home_dir, &near_config.config.store) + let store = near_store::NodeStorage::opener(home_dir, &near_config.config.store, None) .open() .unwrap() .get_store(near_store::Temperature::Hot); diff --git a/genesis-tools/genesis-populate/src/state_dump.rs b/genesis-tools/genesis-populate/src/state_dump.rs index a27deabc4d8..79a890fdaab 100644 --- a/genesis-tools/genesis-populate/src/state_dump.rs +++ b/genesis-tools/genesis-populate/src/state_dump.rs @@ -20,7 +20,9 @@ impl StateDump { let storage = TestDB::new(); near_store::NodeStorage::new(storage) } else { - near_store::NodeStorage::opener(store_home_dir, &Default::default()).open().unwrap() + near_store::NodeStorage::opener(store_home_dir, &Default::default(), None) + .open() + .unwrap() }; let store = node_storage.get_store(Temperature::Hot); let state_file = dir.join(STATE_DUMP_FILE); diff --git a/nearcore/Cargo.toml b/nearcore/Cargo.toml index e99207599f3..96f21f355f3 100644 --- a/nearcore/Cargo.toml +++ b/nearcore/Cargo.toml @@ -138,3 +138,5 @@ sandbox = [ io_trace = ["near-vm-runner/io_trace"] shardnet = ["near-network/shardnet"] + +cold_store = ["near-store/cold_store"] diff --git a/nearcore/benches/store.rs b/nearcore/benches/store.rs index 4ba25551775..81e4776f60f 100644 --- a/nearcore/benches/store.rs +++ b/nearcore/benches/store.rs @@ -28,7 +28,7 @@ fn read_trie_items(bench: &mut Bencher, shard_id: usize, mode: Mode) { bench.iter(move || { tracing::info!(target: "neard", "{:?}", home_dir); - let store = near_store::NodeStorage::opener(&home_dir, &near_config.config.store) + let store = near_store::NodeStorage::opener(&home_dir, &near_config.config.store, None) .open_in_mode(mode) .unwrap() .get_store(Temperature::Hot); diff --git a/nearcore/src/config.rs b/nearcore/src/config.rs index 45adc4c62e2..cd304cc8484 100644 --- a/nearcore/src/config.rs +++ b/nearcore/src/config.rs @@ -321,8 +321,12 @@ pub struct Config { /// If set, overrides value in genesis configuration. #[serde(skip_serializing_if = "Option::is_none")] pub max_gas_burnt_view: Option, - /// Different parameters to configure/optimize underlying storage. + /// Different parameters to configure underlying storage. pub store: near_store::StoreConfig, + /// Different parameters to configure underlying cold storage. + #[cfg(feature = "cold_store")] + #[serde(default, skip_serializing_if = "Option::is_none")] + pub cold_store: Option, // TODO(mina86): Remove those two altogether at some point. We need to be // somewhat careful though and make sure that we don’t start silently @@ -368,6 +372,8 @@ impl Default for Config { db_migration_snapshot_path: None, use_db_migration_snapshot: None, store: near_store::StoreConfig::default(), + #[cfg(feature = "cold_store")] + cold_store: None, } } } diff --git a/nearcore/src/lib.rs b/nearcore/src/lib.rs index fcde2590e5a..8368a1cbc48 100644 --- a/nearcore/src/lib.rs +++ b/nearcore/src/lib.rs @@ -54,9 +54,16 @@ pub fn get_default_home() -> PathBuf { /// being opened. fn open_storage(home_dir: &Path, near_config: &mut NearConfig) -> anyhow::Result { let migrator = migrations::Migrator::new(near_config); - let opener = NodeStorage::opener(home_dir, &near_config.config.store) - .with_migrator(&migrator) - .expect_archive(near_config.client_config.archive); + let opener = NodeStorage::opener( + home_dir, + &near_config.config.store, + #[cfg(feature = "cold_store")] + near_config.config.cold_store.as_ref(), + #[cfg(not(feature = "cold_store"))] + None, + ) + .with_migrator(&migrator) + .expect_archive(near_config.client_config.archive); let storage = match opener.open() { Ok(storage) => Ok(storage), Err(StoreOpenerError::IO(err)) => { @@ -66,6 +73,23 @@ fn open_storage(home_dir: &Path, near_config: &mut NearConfig) -> anyhow::Result Err(StoreOpenerError::DbDoesNotExist) => unreachable!(), // Cannot happen with Mode::ReadWrite Err(StoreOpenerError::DbAlreadyExists) => unreachable!(), + Err(StoreOpenerError::HotColdExistenceMismatch) => { + Err(anyhow::anyhow!( + "Hot and cold databases must either both exist or both not exist.\n\ + Note that at this moment it’s not possible to convert and RPC or legacy archive database into split hot+cold database.\n\ + To set up node in that configuration, start with neither of the databases existing.", + )) + }, + Err(err @ StoreOpenerError::HotColdVersionMismatch { .. }) => { + Err(anyhow::anyhow!("{err}")) + }, + Err(StoreOpenerError::DbKindMismatch { which, got, want }) => { + Err(if let Some(got) = got { + anyhow::anyhow!("{which} database kind should be {want} but got {got}") + } else { + anyhow::anyhow!("{which} database kind should be {want} but none was set") + }) + } Err(StoreOpenerError::SnapshotAlreadyExists(snap_path)) => { Err(anyhow::anyhow!( "Detected an existing database migration snapshot at ‘{}’.\n\ @@ -259,7 +283,7 @@ pub fn recompress_storage(home_dir: &Path, opts: RecompressOpts) -> anyhow::Resu skip_columns.push(DBCol::TrieChanges); } - let src_opener = NodeStorage::opener(home_dir, &config.store); + let src_opener = NodeStorage::opener(home_dir, &config.store, None); let src_path = src_opener.path(); let mut dst_config = config.store.clone(); @@ -267,7 +291,7 @@ pub fn recompress_storage(home_dir: &Path, opts: RecompressOpts) -> anyhow::Resu // Note: opts.dest_dir is resolved relative to current working directory // (since it’s a command line option) which is why we set home to cwd. let cwd = std::env::current_dir()?; - let dst_opener = NodeStorage::opener(&cwd, &dst_config); + let dst_opener = NodeStorage::opener(&cwd, &dst_config, None); let dst_path = dst_opener.path(); info!(target: "recompress", diff --git a/neard/Cargo.toml b/neard/Cargo.toml index ffd66616724..54d76f808d6 100644 --- a/neard/Cargo.toml +++ b/neard/Cargo.toml @@ -59,6 +59,7 @@ rosetta_rpc = ["nearcore/rosetta_rpc"] json_rpc = ["nearcore/json_rpc"] protocol_feature_fix_staking_threshold = ["nearcore/protocol_feature_fix_staking_threshold"] protocol_feature_flat_state = ["nearcore/protocol_feature_flat_state"] +cold_store = ["nearcore/cold_store", "near-store/cold_store"] nightly = [ "nightly_protocol", diff --git a/runtime/runtime-params-estimator/src/main.rs b/runtime/runtime-params-estimator/src/main.rs index ef6e979652f..55f6d50b884 100644 --- a/runtime/runtime-params-estimator/src/main.rs +++ b/runtime/runtime-params-estimator/src/main.rs @@ -156,10 +156,11 @@ fn main() -> anyhow::Result<()> { let near_config = nearcore::load_config(&state_dump_path, GenesisValidationMode::Full) .context("Error loading config")?; - let store = near_store::NodeStorage::opener(&state_dump_path, &near_config.config.store) - .open() - .unwrap() - .get_store(near_store::Temperature::Hot); + let store = + near_store::NodeStorage::opener(&state_dump_path, &near_config.config.store, None) + .open() + .unwrap() + .get_store(near_store::Temperature::Hot); GenesisBuilder::from_config_and_store(&state_dump_path, near_config, store) .add_additional_accounts(cli_args.additional_accounts_num) .add_additional_accounts_contract(contract_code.to_vec()) diff --git a/test-utils/store-validator/src/main.rs b/test-utils/store-validator/src/main.rs index e58e6358a6a..9acfb61590c 100644 --- a/test-utils/store-validator/src/main.rs +++ b/test-utils/store-validator/src/main.rs @@ -30,7 +30,7 @@ fn main() { let near_config = load_config(home_dir, GenesisValidationMode::Full) .unwrap_or_else(|e| panic!("Error loading config: {:#}", e)); - let store = near_store::NodeStorage::opener(home_dir, &near_config.config.store) + let store = near_store::NodeStorage::opener(home_dir, &near_config.config.store, None) .open() .unwrap() .get_store(near_store::Temperature::Hot); diff --git a/tools/mirror/src/lib.rs b/tools/mirror/src/lib.rs index 079ffe74472..78e2996b44d 100644 --- a/tools/mirror/src/lib.rs +++ b/tools/mirror/src/lib.rs @@ -134,8 +134,9 @@ struct TxMirror { } fn open_db>(home: P, config: &NearConfig) -> anyhow::Result { - let db_path = - near_store::NodeStorage::opener(home.as_ref(), &config.config.store).path().join("mirror"); + let db_path = near_store::NodeStorage::opener(home.as_ref(), &config.config.store, None) + .path() + .join("mirror"); let mut options = rocksdb::Options::default(); options.create_missing_column_families(true); options.create_if_missing(true); diff --git a/tools/mock-node/src/setup.rs b/tools/mock-node/src/setup.rs index 5263ebf58ad..26124192fc4 100644 --- a/tools/mock-node/src/setup.rs +++ b/tools/mock-node/src/setup.rs @@ -33,7 +33,7 @@ fn setup_runtime( let store = if in_memory_storage { create_test_store() } else { - near_store::NodeStorage::opener(home_dir, &config.config.store) + near_store::NodeStorage::opener(home_dir, &config.config.store, None) .open() .unwrap() .get_store(near_store::Temperature::Hot) diff --git a/tools/state-viewer/src/cli.rs b/tools/state-viewer/src/cli.rs index 472d6c65907..968a475ed0f 100644 --- a/tools/state-viewer/src/cli.rs +++ b/tools/state-viewer/src/cli.rs @@ -76,7 +76,8 @@ impl StateViewerSubCommand { pub fn run(self, home_dir: &Path, genesis_validation: GenesisValidationMode, mode: Mode) { let near_config = load_config(home_dir, genesis_validation) .unwrap_or_else(|e| panic!("Error loading config: {:#}", e)); - let store_opener = near_store::NodeStorage::opener(home_dir, &near_config.config.store); + let store_opener = + near_store::NodeStorage::opener(home_dir, &near_config.config.store, None); let store = store_opener.open_in_mode(mode).unwrap(); let hot = store.get_store(near_store::Temperature::Hot); match self { From 390f52b963f7b85ea72373573718176401cf4522 Mon Sep 17 00:00:00 2001 From: pompon0 Date: Tue, 25 Oct 2022 14:01:05 +0200 Subject: [PATCH 021/103] moved Network(View)Client(Messages/Responses) to near_client (#7908) These actix messages are an implementation detail of near_client crate. --- chain/client/src/adapter.rs | 109 +++++++++++++- chain/client/src/client.rs | 5 +- chain/client/src/client_actor.rs | 4 +- chain/client/src/test_utils.rs | 16 ++- chain/client/src/tests/bug_repros.rs | 4 +- chain/client/src/tests/catching_up.rs | 5 +- chain/client/src/tests/consensus.rs | 5 +- chain/client/src/tests/cross_shard_tx.rs | 4 +- chain/client/src/tests/query_client.rs | 9 +- chain/client/src/view_client.rs | 6 +- chain/jsonrpc/src/lib.rs | 12 +- chain/network/src/types.rs | 135 ++---------------- chain/rosetta-rpc/src/lib.rs | 8 +- .../src/tests/client/chunks_management.rs | 3 +- .../access_key_nonce_for_implicit_accounts.rs | 5 +- .../account_id_in_function_call_permission.rs | 2 +- .../src/tests/client/process_blocks.rs | 5 +- .../src/tests/nearcore/stake_nodes.rs | 2 +- .../src/tests/nearcore/sync_nodes.rs | 2 +- tools/mirror/src/lib.rs | 2 +- tools/mock-node/src/lib.rs | 5 +- tools/mock-node/src/setup.rs | 5 +- tools/state-viewer/src/apply_chunk.rs | 2 +- 23 files changed, 177 insertions(+), 178 deletions(-) diff --git a/chain/client/src/adapter.rs b/chain/client/src/adapter.rs index de2c1f818a4..d53a4d371dd 100644 --- a/chain/client/src/adapter.rs +++ b/chain/client/src/adapter.rs @@ -2,13 +2,13 @@ use crate::client_actor::ClientActor; use crate::view_client::ViewClientActor; use near_network::time; use near_network::types::{ - NetworkClientMessages, NetworkClientResponses, NetworkInfo, NetworkViewClientMessages, - NetworkViewClientResponses, PartialEncodedChunkForwardMsg, PartialEncodedChunkRequestMsg, + NetworkInfo, PartialEncodedChunkForwardMsg, PartialEncodedChunkRequestMsg, PartialEncodedChunkResponseMsg, ReasonForBan, StateResponseInfo, }; use near_o11y::WithSpanContextExt; use near_primitives::block::{Approval, Block, BlockHeader}; use near_primitives::challenge::Challenge; +use near_primitives::errors::InvalidTxError; use near_primitives::hash::CryptoHash; use near_primitives::network::{AnnounceAccount, PeerId}; use near_primitives::sharding::PartialEncodedChunk; @@ -16,6 +16,111 @@ use near_primitives::transaction::SignedTransaction; use near_primitives::types::{AccountId, EpochId, ShardId}; use near_primitives::views::FinalExecutionOutcomeView; +#[derive(actix::Message, Debug, strum::AsRefStr, strum::IntoStaticStr)] +// TODO(#1313): Use Box +#[allow(clippy::large_enum_variant)] +#[rtype(result = "NetworkClientResponses")] +pub enum NetworkClientMessages { + #[cfg(feature = "test_features")] + Adversarial(near_network::types::NetworkAdversarialMessage), + + /// Received transaction. + Transaction { + transaction: SignedTransaction, + /// Whether the transaction is forwarded from other nodes. + is_forwarded: bool, + /// Whether the transaction needs to be submitted. + check_only: bool, + }, + /// Received block, possibly requested. + Block(Block, PeerId, bool), + /// Received list of headers for syncing. + BlockHeaders(Vec, PeerId), + /// Block approval. + BlockApproval(Approval, PeerId), + /// State response. + StateResponse(StateResponseInfo), + + /// Request chunk parts and/or receipts. + PartialEncodedChunkRequest(PartialEncodedChunkRequestMsg, CryptoHash), + /// Response to a request for chunk parts and/or receipts. + PartialEncodedChunkResponse(PartialEncodedChunkResponseMsg, std::time::Instant), + /// Information about chunk such as its header, some subset of parts and/or incoming receipts + PartialEncodedChunk(PartialEncodedChunk), + /// Forwarding parts to those tracking the shard (so they don't need to send requests) + PartialEncodedChunkForward(PartialEncodedChunkForwardMsg), + + /// A challenge to invalidate the block. + Challenge(Challenge), + + NetworkInfo(NetworkInfo), +} + +// TODO(#1313): Use Box +#[derive(Eq, PartialEq, Debug, actix::MessageResponse)] +#[allow(clippy::large_enum_variant)] +pub enum NetworkClientResponses { + /// Adv controls. + #[cfg(feature = "test_features")] + AdvResult(u64), + + /// No response. + NoResponse, + /// Valid transaction inserted into mempool as response to Transaction. + ValidTx, + /// Invalid transaction inserted into mempool as response to Transaction. + InvalidTx(InvalidTxError), + /// The request is routed to other shards + RequestRouted, + /// The node being queried does not track the shard needed and therefore cannot provide userful + /// response. + DoesNotTrackShard, + /// Ban peer for malicious behavior. + Ban { ban_reason: ReasonForBan }, +} + +#[derive(actix::Message, strum::IntoStaticStr)] +#[rtype(result = "NetworkViewClientResponses")] +pub enum NetworkViewClientMessages { + #[cfg(feature = "test_features")] + Adversarial(near_network::types::NetworkAdversarialMessage), + + /// Transaction status query + TxStatus { tx_hash: CryptoHash, signer_account_id: AccountId }, + /// Transaction status response + TxStatusResponse(Box), + /// Request a block. + BlockRequest(CryptoHash), + /// Request headers. + BlockHeadersRequest(Vec), + /// State request header. + StateRequestHeader { shard_id: ShardId, sync_hash: CryptoHash }, + /// State request part. + StateRequestPart { shard_id: ShardId, sync_hash: CryptoHash, part_id: u64 }, + /// Account announcements that needs to be validated before being processed. + /// They are paired with last epoch id known to this announcement, in order to accept only + /// newer announcements. + AnnounceAccount(Vec<(AnnounceAccount, Option)>), +} + +#[derive(Debug, actix::MessageResponse)] +pub enum NetworkViewClientResponses { + /// Transaction execution outcome + TxStatus(Box), + /// Block response. + Block(Box), + /// Headers response. + BlockHeaders(Vec), + /// Response to state request. + StateResponse(Box), + /// Valid announce accounts. + AnnounceAccount(Vec), + /// Ban peer for malicious behavior. + Ban { ban_reason: ReasonForBan }, + /// Response not needed + NoResponse, +} + pub struct Adapter { /// Address of the client actor. client_addr: actix::Addr, diff --git a/chain/client/src/client.rs b/chain/client/src/client.rs index dc9c159f85d..e54a8a2fccd 100644 --- a/chain/client/src/client.rs +++ b/chain/client/src/client.rs @@ -14,6 +14,7 @@ use near_client_primitives::debug::ChunkProduction; use near_primitives::time::Clock; use tracing::{debug, error, info, trace, warn}; +use crate::adapter::NetworkClientResponses; use near_chain::chain::{ ApplyStatePartsRequest, BlockCatchUpRequest, BlockMissingChunks, BlocksCatchUpState, OrphanMissingChunks, StateSplitRequest, TX_ROUTING_HEIGHT_HORIZON, @@ -26,9 +27,7 @@ use near_chain::{ }; use near_chain_configs::ClientConfig; use near_chunks::ShardsManager; -use near_network::types::{ - FullPeerInfo, NetworkClientResponses, NetworkRequests, PeerManagerAdapter, -}; +use near_network::types::{FullPeerInfo, NetworkRequests, PeerManagerAdapter}; use near_primitives::block::{Approval, ApprovalInner, ApprovalMessage, Block, BlockHeader, Tip}; use near_primitives::challenge::{Challenge, ChallengeBody}; use near_primitives::hash::CryptoHash; diff --git a/chain/client/src/client_actor.rs b/chain/client/src/client_actor.rs index 11aad2cb667..ce9d35df40d 100644 --- a/chain/client/src/client_actor.rs +++ b/chain/client/src/client_actor.rs @@ -1,5 +1,6 @@ //! Client actor orchestrates Client and facilitates network connection. +use crate::adapter::{NetworkClientMessages, NetworkClientResponses}; use crate::client::{Client, EPOCH_START_INFO_BLOCKS}; use crate::info::{ display_sync_status, get_validator_epoch_stats, InfoHelper, ValidatorInfoHelper, @@ -32,8 +33,7 @@ use near_client_primitives::types::{ }; use near_network::types::ReasonForBan; use near_network::types::{ - NetworkClientMessages, NetworkClientResponses, NetworkInfo, NetworkRequests, - PeerManagerAdapter, PeerManagerMessageRequest, + NetworkInfo, NetworkRequests, PeerManagerAdapter, PeerManagerMessageRequest, }; use near_o11y::{handler_debug_span, OpenTelemetrySpanExt, WithSpanContext, WithSpanContextExt}; use near_performance_metrics; diff --git a/chain/client/src/test_utils.rs b/chain/client/src/test_utils.rs index 68dbb243159..ee176ef878d 100644 --- a/chain/client/src/test_utils.rs +++ b/chain/client/src/test_utils.rs @@ -13,6 +13,11 @@ use once_cell::sync::OnceCell; use rand::{thread_rng, Rng}; use tracing::info; +use crate::adapter::{ + NetworkClientMessages, NetworkClientResponses, NetworkViewClientMessages, + NetworkViewClientResponses, +}; +use crate::{start_view_client, Client, ClientActor, SyncStatus, ViewClientActor}; use near_chain::chain::{do_apply_chunks, BlockCatchUpRequest, StateSplitRequest}; use near_chain::test_utils::{ wait_for_all_blocks_in_processing, wait_for_block_in_processing, KeyValueRuntime, @@ -29,13 +34,12 @@ use near_crypto::{InMemorySigner, KeyType, PublicKey}; use near_network::test_utils::MockPeerManagerAdapter; use near_network::types::PartialEdgeInfo; use near_network::types::{ - AccountOrPeerIdOrHash, NetworkViewClientMessages, NetworkViewClientResponses, - PartialEncodedChunkRequestMsg, PartialEncodedChunkResponseMsg, PeerChainInfoV2, PeerInfo, - PeerType, + AccountOrPeerIdOrHash, PartialEncodedChunkRequestMsg, PartialEncodedChunkResponseMsg, + PeerChainInfoV2, PeerInfo, PeerType, }; use near_network::types::{ - ConnectedPeerInfo, FullPeerInfo, NetworkClientMessages, NetworkClientResponses, - NetworkRecipient, NetworkRequests, NetworkResponses, PeerManagerAdapter, + ConnectedPeerInfo, FullPeerInfo, NetworkRecipient, NetworkRequests, NetworkResponses, + PeerManagerAdapter, }; use near_network::types::{ NetworkInfo, PeerManagerMessageRequest, PeerManagerMessageResponse, SetChainInfo, @@ -67,8 +71,6 @@ use near_store::test_utils::create_test_store; use near_store::Store; use near_telemetry::TelemetryActor; -use crate::{start_view_client, Client, ClientActor, SyncStatus, ViewClientActor}; - pub struct PeerManagerMock { handle: Box< dyn FnMut( diff --git a/chain/client/src/tests/bug_repros.rs b/chain/client/src/tests/bug_repros.rs index 6ece19feabc..adde73db03a 100644 --- a/chain/client/src/tests/bug_repros.rs +++ b/chain/client/src/tests/bug_repros.rs @@ -9,6 +9,7 @@ use actix::{Addr, System}; use futures::FutureExt; use rand::{thread_rng, Rng}; +use crate::adapter::NetworkClientMessages; use crate::test_utils::setup_mock_all_validators; use crate::{ClientActor, GetBlock, ViewClientActor}; use near_actix_test_utils::run_actix; @@ -17,8 +18,7 @@ use near_crypto::{InMemorySigner, KeyType}; use near_network::types::NetworkRequests::PartialEncodedChunkMessage; use near_network::types::PeerInfo; use near_network::types::{ - NetworkClientMessages, NetworkRequests, NetworkResponses, PeerManagerMessageRequest, - PeerManagerMessageResponse, + NetworkRequests, NetworkResponses, PeerManagerMessageRequest, PeerManagerMessageResponse, }; use near_o11y::testonly::init_test_logger; use near_o11y::WithSpanContextExt; diff --git a/chain/client/src/tests/catching_up.rs b/chain/client/src/tests/catching_up.rs index 5decf2bde0f..300c08bb207 100644 --- a/chain/client/src/tests/catching_up.rs +++ b/chain/client/src/tests/catching_up.rs @@ -6,6 +6,7 @@ use actix::{Addr, System}; use borsh::{BorshDeserialize, BorshSerialize}; use futures::{future, FutureExt}; +use crate::adapter::NetworkClientMessages; use crate::test_utils::setup_mock_all_validators; use crate::{ClientActor, Query, ViewClientActor}; use near_actix_test_utils::run_actix; @@ -13,9 +14,7 @@ use near_chain::test_utils::{account_id_to_shard_id, ValidatorSchedule}; use near_chain_configs::TEST_STATE_SYNC_TIMEOUT; use near_crypto::{InMemorySigner, KeyType}; use near_network::types::{AccountIdOrPeerTrackingShard, AccountOrPeerIdOrHash, PeerInfo}; -use near_network::types::{ - NetworkClientMessages, NetworkRequests, NetworkResponses, PeerManagerMessageRequest, -}; +use near_network::types::{NetworkRequests, NetworkResponses, PeerManagerMessageRequest}; use near_o11y::testonly::init_integration_logger; use near_o11y::WithSpanContextExt; use near_primitives::hash::{hash as hash_func, CryptoHash}; diff --git a/chain/client/src/tests/consensus.rs b/chain/client/src/tests/consensus.rs index d1b9fd7a3c6..80f0792a435 100644 --- a/chain/client/src/tests/consensus.rs +++ b/chain/client/src/tests/consensus.rs @@ -5,14 +5,13 @@ use actix::{Addr, System}; use near_chain::test_utils::ValidatorSchedule; use rand::{thread_rng, Rng}; +use crate::adapter::NetworkClientMessages; use crate::test_utils::setup_mock_all_validators; use crate::{ClientActor, ViewClientActor}; use near_actix_test_utils::run_actix; use near_chain::Block; use near_network::types::PeerInfo; -use near_network::types::{ - NetworkClientMessages, NetworkRequests, NetworkResponses, PeerManagerMessageRequest, -}; +use near_network::types::{NetworkRequests, NetworkResponses, PeerManagerMessageRequest}; use near_o11y::testonly::init_integration_logger; use near_o11y::WithSpanContextExt; use near_primitives::block::{Approval, ApprovalInner}; diff --git a/chain/client/src/tests/cross_shard_tx.rs b/chain/client/src/tests/cross_shard_tx.rs index 5632f1d961d..16611caac23 100644 --- a/chain/client/src/tests/cross_shard_tx.rs +++ b/chain/client/src/tests/cross_shard_tx.rs @@ -7,13 +7,13 @@ use std::sync::{Arc, RwLock}; use actix::{Addr, MailboxError, System}; use futures::{future, FutureExt}; +use crate::adapter::{NetworkClientMessages, NetworkClientResponses}; use near_actix_test_utils::run_actix; use near_chain::test_utils::{account_id_to_shard_id, ValidatorSchedule}; use near_crypto::{InMemorySigner, KeyType}; use near_network::types::PeerInfo; use near_network::types::{ - NetworkClientMessages, NetworkClientResponses, NetworkResponses, PeerManagerMessageRequest, - PeerManagerMessageResponse, + NetworkResponses, PeerManagerMessageRequest, PeerManagerMessageResponse, }; use near_o11y::testonly::init_integration_logger; use near_o11y::WithSpanContextExt; diff --git a/chain/client/src/tests/query_client.rs b/chain/client/src/tests/query_client.rs index c4aa0b40561..bde0c3444e5 100644 --- a/chain/client/src/tests/query_client.rs +++ b/chain/client/src/tests/query_client.rs @@ -5,6 +5,10 @@ use near_primitives::merkle::PartialMerkleTree; use std::sync::Arc; use std::time::Duration; +use crate::adapter::{ + NetworkClientMessages, NetworkClientResponses, NetworkViewClientMessages, + NetworkViewClientResponses, +}; use crate::test_utils::{setup_mock_all_validators, setup_no_network, setup_only_view}; use crate::{ GetBlock, GetBlockWithMerkleTree, GetExecutionOutcomesForBlock, Query, QueryError, Status, @@ -14,11 +18,10 @@ use near_actix_test_utils::run_actix; use near_chain_configs::DEFAULT_GC_NUM_EPOCHS_TO_KEEP; use near_crypto::{InMemorySigner, KeyType}; use near_network::test_utils::MockPeerManagerAdapter; +use near_network::types::PeerInfo; use near_network::types::{ - NetworkClientMessages, NetworkClientResponses, NetworkRequests, NetworkResponses, - PeerManagerMessageRequest, PeerManagerMessageResponse, + NetworkRequests, NetworkResponses, PeerManagerMessageRequest, PeerManagerMessageResponse, }; -use near_network::types::{NetworkViewClientMessages, NetworkViewClientResponses, PeerInfo}; use near_o11y::testonly::init_test_logger; use near_o11y::WithSpanContextExt; diff --git a/chain/client/src/view_client.rs b/chain/client/src/view_client.rs index 6458a211e76..dde48aedd87 100644 --- a/chain/client/src/view_client.rs +++ b/chain/client/src/view_client.rs @@ -12,6 +12,7 @@ use std::time::{Duration, Instant}; use tracing::{debug, error, info, trace, warn}; +use crate::adapter::{NetworkViewClientMessages, NetworkViewClientResponses}; use near_chain::{ get_epoch_block_producers_view, Chain, ChainGenesis, ChainStoreAccess, DoomslugThresholdMode, RuntimeAdapter, @@ -28,9 +29,8 @@ use near_client_primitives::types::{ #[cfg(feature = "test_features")] use near_network::types::NetworkAdversarialMessage; use near_network::types::{ - NetworkRequests, NetworkViewClientMessages, NetworkViewClientResponses, PeerManagerAdapter, - PeerManagerMessageRequest, ReasonForBan, StateResponseInfo, StateResponseInfoV1, - StateResponseInfoV2, + NetworkRequests, PeerManagerAdapter, PeerManagerMessageRequest, ReasonForBan, + StateResponseInfo, StateResponseInfoV1, StateResponseInfoV2, }; use near_o11y::{handler_debug_span, OpenTelemetrySpanExt, WithSpanContext, WithSpanContextExt}; use near_performance_metrics_macros::perf; diff --git a/chain/jsonrpc/src/lib.rs b/chain/jsonrpc/src/lib.rs index edad2dbed03..20f5261c547 100644 --- a/chain/jsonrpc/src/lib.rs +++ b/chain/jsonrpc/src/lib.rs @@ -17,6 +17,7 @@ use tokio::time::{sleep, timeout}; use tracing::info; use near_chain_configs::GenesisConfig; +use near_client::adapter::{NetworkClientMessages, NetworkClientResponses}; use near_client::{ ClientActor, DebugStatus, GetBlock, GetBlockProof, GetChunk, GetExecutionOutcome, GetGasPrice, GetNetworkInfo, GetNextLightClientBlock, GetProtocolConfig, GetReceipt, GetStateChanges, @@ -27,7 +28,6 @@ pub use near_jsonrpc_client as client; use near_jsonrpc_primitives::errors::RpcError; use near_jsonrpc_primitives::message::{Message, Request}; use near_jsonrpc_primitives::types::config::RpcProtocolConfigResponse; -use near_network::types::{NetworkClientMessages, NetworkClientResponses}; use near_o11y::metrics::{prometheus, Encoder, TextEncoder}; use near_primitives::hash::CryptoHash; use near_primitives::transaction::SignedTransaction; @@ -1147,7 +1147,7 @@ impl JsonRpcHandler { actix::spawn( self.client_addr .send( - near_network::types::NetworkClientMessages::Adversarial( + near_client::adapter::NetworkClientMessages::Adversarial( near_network::types::NetworkAdversarialMessage::AdvSetSyncInfo(height), ) .with_span_context(), @@ -1161,7 +1161,7 @@ impl JsonRpcHandler { actix::spawn( self.client_addr .send( - near_network::types::NetworkClientMessages::Adversarial( + near_client::adapter::NetworkClientMessages::Adversarial( near_network::types::NetworkAdversarialMessage::AdvDisableHeaderSync, ) .with_span_context(), @@ -1171,7 +1171,7 @@ impl JsonRpcHandler { actix::spawn( self.view_client_addr .send( - near_network::types::NetworkViewClientMessages::Adversarial( + near_client::adapter::NetworkViewClientMessages::Adversarial( near_network::types::NetworkAdversarialMessage::AdvDisableHeaderSync, ) .with_span_context(), @@ -1195,7 +1195,7 @@ impl JsonRpcHandler { actix::spawn( self.view_client_addr .send( - near_network::types::NetworkViewClientMessages::Adversarial( + near_client::adapter::NetworkViewClientMessages::Adversarial( near_network::types::NetworkAdversarialMessage::AdvDisableDoomslug, ) .with_span_context(), @@ -1237,7 +1237,7 @@ impl JsonRpcHandler { actix::spawn( self.view_client_addr .send( - near_network::types::NetworkViewClientMessages::Adversarial( + near_client::adapter::NetworkViewClientMessages::Adversarial( near_network::types::NetworkAdversarialMessage::AdvSwitchToHeight(height), ) .with_span_context(), diff --git a/chain/network/src/types.rs b/chain/network/src/types.rs index 2f8e9c8be55..b26567cacc0 100644 --- a/chain/network/src/types.rs +++ b/chain/network/src/types.rs @@ -9,16 +9,14 @@ use futures::future::BoxFuture; use futures::FutureExt; use near_crypto::PublicKey; use near_o11y::WithSpanContext; -use near_primitives::block::{Approval, ApprovalMessage, Block, BlockHeader}; +use near_primitives::block::{ApprovalMessage, Block}; use near_primitives::challenge::Challenge; -use near_primitives::errors::InvalidTxError; use near_primitives::hash::CryptoHash; use near_primitives::network::{AnnounceAccount, PeerId}; -use near_primitives::sharding::{PartialEncodedChunk, PartialEncodedChunkWithArcReceipts}; +use near_primitives::sharding::PartialEncodedChunkWithArcReceipts; use near_primitives::transaction::SignedTransaction; use near_primitives::types::BlockHeight; use near_primitives::types::{AccountId, EpochId, ShardId}; -use near_primitives::views::FinalExecutionOutcomeView; use near_primitives::views::{KnownProducerView, NetworkInfoView, PeerInfoView}; use once_cell::sync::OnceCell; use std::collections::HashMap; @@ -397,67 +395,17 @@ pub enum NetworkResponses { RouteNotFound, } -#[derive(actix::Message, Debug, strum::AsRefStr, strum::IntoStaticStr)] -// TODO(#1313): Use Box -#[allow(clippy::large_enum_variant)] -#[rtype(result = "NetworkClientResponses")] -pub enum NetworkClientMessages { - #[cfg(feature = "test_features")] - Adversarial(crate::types::NetworkAdversarialMessage), - - /// Received transaction. - Transaction { - transaction: SignedTransaction, - /// Whether the transaction is forwarded from other nodes. - is_forwarded: bool, - /// Whether the transaction needs to be submitted. - check_only: bool, - }, - /// Received block, possibly requested. - Block(Block, PeerId, bool), - /// Received list of headers for syncing. - BlockHeaders(Vec, PeerId), - /// Block approval. - BlockApproval(Approval, PeerId), - /// State response. - StateResponse(StateResponseInfo), - - /// Request chunk parts and/or receipts. - PartialEncodedChunkRequest(PartialEncodedChunkRequestMsg, CryptoHash), - /// Response to a request for chunk parts and/or receipts. - PartialEncodedChunkResponse(PartialEncodedChunkResponseMsg, std::time::Instant), - /// Information about chunk such as its header, some subset of parts and/or incoming receipts - PartialEncodedChunk(PartialEncodedChunk), - /// Forwarding parts to those tracking the shard (so they don't need to send requests) - PartialEncodedChunkForward(PartialEncodedChunkForwardMsg), - - /// A challenge to invalidate the block. - Challenge(Challenge), - - NetworkInfo(NetworkInfo), -} - -// TODO(#1313): Use Box -#[derive(Eq, PartialEq, Debug, actix::MessageResponse)] -#[allow(clippy::large_enum_variant)] -pub enum NetworkClientResponses { - /// Adv controls. - #[cfg(feature = "test_features")] - AdvResult(u64), - - /// No response. - NoResponse, - /// Valid transaction inserted into mempool as response to Transaction. - ValidTx, - /// Invalid transaction inserted into mempool as response to Transaction. - InvalidTx(InvalidTxError), - /// The request is routed to other shards - RequestRouted, - /// The node being queried does not track the shard needed and therefore cannot provide userful - /// response. - DoesNotTrackShard, - /// Ban peer for malicious behavior. - Ban { ban_reason: ReasonForBan }, +#[cfg(feature = "test_features")] +#[derive(actix::Message, Debug)] +#[rtype(result = "Option")] +pub enum NetworkAdversarialMessage { + AdvProduceBlocks(u64, bool), + AdvSwitchToHeight(u64), + AdvDisableHeaderSync, + AdvDisableDoomslug, + AdvGetSavedBlocks, + AdvCheckStorageConsistency, + AdvSetSyncInfo(u64), } pub trait MsgRecipient: Send + Sync + 'static { @@ -479,7 +427,6 @@ where actix::Addr::do_send(self, msg) } } - pub trait PeerManagerAdapter: MsgRecipient> + MsgRecipient> @@ -542,8 +489,6 @@ mod tests { assert_size!(HandshakeFailureReason); assert_size!(NetworkRequests); assert_size!(NetworkResponses); - assert_size!(NetworkClientMessages); - assert_size!(NetworkClientResponses); assert_size!(Handshake); assert_size!(Ping); assert_size!(Pong); @@ -636,57 +581,3 @@ pub struct AccountIdOrPeerTrackingShard { /// Only send messages to peers whose latest chain height is no less `min_height` pub min_height: BlockHeight, } - -#[derive(actix::Message, strum::IntoStaticStr)] -#[rtype(result = "NetworkViewClientResponses")] -pub enum NetworkViewClientMessages { - #[cfg(feature = "test_features")] - Adversarial(NetworkAdversarialMessage), - - /// Transaction status query - TxStatus { tx_hash: CryptoHash, signer_account_id: AccountId }, - /// Transaction status response - TxStatusResponse(Box), - /// Request a block. - BlockRequest(CryptoHash), - /// Request headers. - BlockHeadersRequest(Vec), - /// State request header. - StateRequestHeader { shard_id: ShardId, sync_hash: CryptoHash }, - /// State request part. - StateRequestPart { shard_id: ShardId, sync_hash: CryptoHash, part_id: u64 }, - /// Account announcements that needs to be validated before being processed. - /// They are paired with last epoch id known to this announcement, in order to accept only - /// newer announcements. - AnnounceAccount(Vec<(AnnounceAccount, Option)>), -} - -#[derive(Debug, actix::MessageResponse)] -pub enum NetworkViewClientResponses { - /// Transaction execution outcome - TxStatus(Box), - /// Block response. - Block(Box), - /// Headers response. - BlockHeaders(Vec), - /// Response to state request. - StateResponse(Box), - /// Valid announce accounts. - AnnounceAccount(Vec), - /// Ban peer for malicious behavior. - Ban { ban_reason: ReasonForBan }, - /// Response not needed - NoResponse, -} - -#[cfg(feature = "test_features")] -#[derive(Debug)] -pub enum NetworkAdversarialMessage { - AdvProduceBlocks(u64, bool), - AdvSwitchToHeight(u64), - AdvDisableHeaderSync, - AdvDisableDoomslug, - AdvGetSavedBlocks, - AdvCheckStorageConsistency, - AdvSetSyncInfo(u64), -} diff --git a/chain/rosetta-rpc/src/lib.rs b/chain/rosetta-rpc/src/lib.rs index 7bd425176e1..f7dd91bf7a9 100644 --- a/chain/rosetta-rpc/src/lib.rs +++ b/chain/rosetta-rpc/src/lib.rs @@ -741,7 +741,7 @@ async fn construction_submit( let transaction_hash = signed_transaction.as_ref().get_hash(); let transaction_submittion = client_addr .send( - near_network::types::NetworkClientMessages::Transaction { + near_client::adapter::NetworkClientMessages::Transaction { transaction: signed_transaction.into_inner(), is_forwarded: false, check_only: false, @@ -750,15 +750,15 @@ async fn construction_submit( ) .await?; match transaction_submittion { - near_network::types::NetworkClientResponses::ValidTx - | near_network::types::NetworkClientResponses::RequestRouted => { + near_client::adapter::NetworkClientResponses::ValidTx + | near_client::adapter::NetworkClientResponses::RequestRouted => { Ok(Json(models::TransactionIdentifierResponse { transaction_identifier: models::TransactionIdentifier::transaction( &transaction_hash, ), })) } - near_network::types::NetworkClientResponses::InvalidTx(error) => { + near_client::adapter::NetworkClientResponses::InvalidTx(error) => { Err(errors::ErrorKind::InvalidInput(error.to_string()).into()) } _ => Err(errors::ErrorKind::InternalInvariantError(format!( diff --git a/integration-tests/src/tests/client/chunks_management.rs b/integration-tests/src/tests/client/chunks_management.rs index 97b0f5b81c6..905ab84464b 100644 --- a/integration-tests/src/tests/client/chunks_management.rs +++ b/integration-tests/src/tests/client/chunks_management.rs @@ -7,11 +7,12 @@ use near_chunks::{ CHUNK_REQUEST_RETRY_MS, CHUNK_REQUEST_SWITCH_TO_FULL_FETCH_MS, CHUNK_REQUEST_SWITCH_TO_OTHERS_MS, }; +use near_client::adapter::NetworkClientMessages; use near_client::test_utils::setup_mock_all_validators; use near_client::{ClientActor, GetBlock, ViewClientActor}; use near_network::types::PeerManagerMessageRequest; use near_network::types::{AccountIdOrPeerTrackingShard, PeerInfo}; -use near_network::types::{NetworkClientMessages, NetworkRequests, NetworkResponses}; +use near_network::types::{NetworkRequests, NetworkResponses}; use near_o11y::testonly::init_test_logger; use near_o11y::WithSpanContextExt; use near_primitives::hash::CryptoHash; diff --git a/integration-tests/src/tests/client/features/access_key_nonce_for_implicit_accounts.rs b/integration-tests/src/tests/client/features/access_key_nonce_for_implicit_accounts.rs index 5994e491b2c..69ae0899968 100644 --- a/integration-tests/src/tests/client/features/access_key_nonce_for_implicit_accounts.rs +++ b/integration-tests/src/tests/client/features/access_key_nonce_for_implicit_accounts.rs @@ -5,11 +5,10 @@ use assert_matches::assert_matches; use near_chain::chain::NUM_ORPHAN_ANCESTORS_CHECK; use near_chain::{ChainGenesis, Error, Provenance, RuntimeAdapter}; use near_chain_configs::Genesis; +use near_client::adapter::NetworkClientResponses; use near_client::test_utils::{create_chunk_with_transactions, TestEnv}; use near_crypto::{InMemorySigner, KeyType, Signer}; -use near_network::types::{ - MsgRecipient, NetworkClientResponses, NetworkRequests, PeerManagerMessageRequest, -}; +use near_network::types::{MsgRecipient, NetworkRequests, PeerManagerMessageRequest}; use near_o11y::testonly::init_test_logger; use near_o11y::WithSpanContextExt; use near_primitives::account::AccessKey; diff --git a/integration-tests/src/tests/client/features/account_id_in_function_call_permission.rs b/integration-tests/src/tests/client/features/account_id_in_function_call_permission.rs index 6c217474c0d..1ff9fcaee4d 100644 --- a/integration-tests/src/tests/client/features/account_id_in_function_call_permission.rs +++ b/integration-tests/src/tests/client/features/account_id_in_function_call_permission.rs @@ -1,8 +1,8 @@ use near_chain::{ChainGenesis, RuntimeAdapter}; use near_chain_configs::Genesis; +use near_client::adapter::NetworkClientResponses; use near_client::test_utils::TestEnv; use near_crypto::{InMemorySigner, KeyType, Signer}; -use near_network::types::NetworkClientResponses; use near_primitives::account::{AccessKey, AccessKeyPermission, FunctionCallPermission}; use near_primitives::errors::{ActionsValidationError, InvalidTxError}; use near_primitives::hash::CryptoHash; diff --git a/integration-tests/src/tests/client/process_blocks.rs b/integration-tests/src/tests/client/process_blocks.rs index 69492fe85cc..db9c9acd1d3 100644 --- a/integration-tests/src/tests/client/process_blocks.rs +++ b/integration-tests/src/tests/client/process_blocks.rs @@ -21,6 +21,7 @@ use near_chain::{ }; use near_chain_configs::{ClientConfig, Genesis, DEFAULT_GC_NUM_EPOCHS_TO_KEEP}; use near_chunks::{ChunkStatus, ShardsManager}; +use near_client::adapter::{NetworkClientMessages, NetworkClientResponses}; use near_client::test_utils::{ create_chunk_on_height, setup_client, setup_mock, setup_mock_all_validators, TestEnv, }; @@ -30,9 +31,7 @@ use near_network::test_utils::{wait_or_panic, MockPeerManagerAdapter}; use near_network::types::{ ConnectedPeerInfo, NetworkInfo, PeerManagerMessageRequest, PeerManagerMessageResponse, }; -use near_network::types::{ - FullPeerInfo, NetworkClientMessages, NetworkClientResponses, NetworkRequests, NetworkResponses, -}; +use near_network::types::{FullPeerInfo, NetworkRequests, NetworkResponses}; use near_network::types::{PeerChainInfoV2, PeerInfo, ReasonForBan}; use near_o11y::testonly::{init_integration_logger, init_test_logger}; use near_o11y::WithSpanContextExt; diff --git a/integration-tests/src/tests/nearcore/stake_nodes.rs b/integration-tests/src/tests/nearcore/stake_nodes.rs index 896824c9e0e..f3a68665751 100644 --- a/integration-tests/src/tests/nearcore/stake_nodes.rs +++ b/integration-tests/src/tests/nearcore/stake_nodes.rs @@ -11,10 +11,10 @@ use crate::genesis_helpers::genesis_hash; use crate::test_helpers::heavy_test; use near_actix_test_utils::run_actix; use near_chain_configs::Genesis; +use near_client::adapter::NetworkClientMessages; use near_client::{ClientActor, GetBlock, Query, Status, ViewClientActor}; use near_crypto::{InMemorySigner, KeyType}; use near_network::test_utils::{convert_boot_nodes, open_port, WaitOrTimeoutActor}; -use near_network::types::NetworkClientMessages; use near_o11y::testonly::init_integration_logger; use near_primitives::hash::CryptoHash; use near_primitives::transaction::SignedTransaction; diff --git a/integration-tests/src/tests/nearcore/sync_nodes.rs b/integration-tests/src/tests/nearcore/sync_nodes.rs index 72092b49189..3ba9a81d6d1 100644 --- a/integration-tests/src/tests/nearcore/sync_nodes.rs +++ b/integration-tests/src/tests/nearcore/sync_nodes.rs @@ -10,10 +10,10 @@ use crate::test_helpers::heavy_test; use near_actix_test_utils::run_actix; use near_chain::Block; use near_chain_configs::Genesis; +use near_client::adapter::NetworkClientMessages; use near_client::{ClientActor, GetBlock}; use near_crypto::{InMemorySigner, KeyType}; use near_network::test_utils::{convert_boot_nodes, open_port, WaitOrTimeoutActor}; -use near_network::types::NetworkClientMessages; use near_network::types::PeerInfo; use near_o11y::testonly::init_integration_logger; use near_o11y::WithSpanContextExt; diff --git a/tools/mirror/src/lib.rs b/tools/mirror/src/lib.rs index 78e2996b44d..34aa5817125 100644 --- a/tools/mirror/src/lib.rs +++ b/tools/mirror/src/lib.rs @@ -2,6 +2,7 @@ use actix::Addr; use anyhow::Context; use borsh::{BorshDeserialize, BorshSerialize}; use near_chain_configs::GenesisValidationMode; +use near_client::adapter::{NetworkClientMessages, NetworkClientResponses}; use near_client::{ClientActor, ViewClientActor}; use near_client_primitives::types::{ GetBlock, GetBlockError, GetChunk, GetChunkError, GetExecutionOutcome, @@ -9,7 +10,6 @@ use near_client_primitives::types::{ }; use near_crypto::{PublicKey, SecretKey}; use near_indexer::{Indexer, StreamerMessage}; -use near_network::types::{NetworkClientMessages, NetworkClientResponses}; use near_o11y::WithSpanContextExt; use near_primitives::hash::CryptoHash; use near_primitives::transaction::{ diff --git a/tools/mock-node/src/lib.rs b/tools/mock-node/src/lib.rs index 3c31b2a76b2..b75106cec74 100644 --- a/tools/mock-node/src/lib.rs +++ b/tools/mock-node/src/lib.rs @@ -5,10 +5,11 @@ use actix::{Actor, Context, Handler, Recipient}; use anyhow::{anyhow, Context as AnyhowContext}; use near_chain::{Block, BlockHeader, Chain, ChainStoreAccess, Error}; use near_chain_configs::GenesisConfig; +use near_client::adapter::NetworkClientMessages; use near_client::sync; use near_network::types::{ - FullPeerInfo, NetworkClientMessages, NetworkInfo, NetworkRequests, NetworkResponses, - PeerManagerMessageRequest, PeerManagerMessageResponse, SetChainInfo, + FullPeerInfo, NetworkInfo, NetworkRequests, NetworkResponses, PeerManagerMessageRequest, + PeerManagerMessageResponse, SetChainInfo, }; use near_network::types::{ PartialEdgeInfo, PartialEncodedChunkRequestMsg, PartialEncodedChunkResponseMsg, PeerInfo, diff --git a/tools/mock-node/src/setup.rs b/tools/mock-node/src/setup.rs index 26124192fc4..ddceb207a20 100644 --- a/tools/mock-node/src/setup.rs +++ b/tools/mock-node/src/setup.rs @@ -8,9 +8,10 @@ use near_chain::{ Chain, ChainGenesis, ChainStore, ChainStoreAccess, DoomslugThresholdMode, RuntimeAdapter, }; use near_chain_configs::GenesisConfig; +use near_client::adapter::NetworkClientMessages; use near_client::{start_client, start_view_client, ClientActor, ViewClientActor}; use near_epoch_manager::{EpochManager, EpochManagerAdapter}; -use near_network::types::{NetworkClientMessages, NetworkRecipient}; +use near_network::types::NetworkRecipient; use near_o11y::WithSpanContext; use near_primitives::state_part::PartId; use near_primitives::syncing::get_num_state_parts; @@ -310,10 +311,10 @@ mod tests { use futures::{future, FutureExt}; use near_actix_test_utils::{run_actix, spawn_interruptible}; use near_chain_configs::Genesis; + use near_client::adapter::NetworkClientMessages; use near_client::GetBlock; use near_crypto::{InMemorySigner, KeyType}; use near_network::test_utils::{open_port, WaitOrTimeoutActor}; - use near_network::types::NetworkClientMessages; use near_o11y::testonly::init_integration_logger; use near_o11y::WithSpanContextExt; use near_primitives::hash::CryptoHash; diff --git a/tools/state-viewer/src/apply_chunk.rs b/tools/state-viewer/src/apply_chunk.rs index 510100eb1ce..53160c076a0 100644 --- a/tools/state-viewer/src/apply_chunk.rs +++ b/tools/state-viewer/src/apply_chunk.rs @@ -407,10 +407,10 @@ pub(crate) fn apply_receipt( mod test { use near_chain::{ChainGenesis, ChainStore, ChainStoreAccess, Provenance}; use near_chain_configs::Genesis; + use near_client::adapter::NetworkClientResponses; use near_client::test_utils::TestEnv; use near_crypto::{InMemorySigner, KeyType}; use near_epoch_manager::EpochManagerAdapter; - use near_network::types::NetworkClientResponses; use near_primitives::hash::CryptoHash; use near_primitives::runtime::config_store::RuntimeConfigStore; use near_primitives::shard_layout; From 1d283bfe4a592ee8a3ed7c14868e9ae35df066ba Mon Sep 17 00:00:00 2001 From: Akhilesh Singhania Date: Tue, 25 Oct 2022 14:24:38 +0200 Subject: [PATCH 022/103] doc: Minor grammar fixes (#7922) Fixes some minor grammar issues from https://github.com/near/nearcore/pull/7918. --- docs/architecture/gas_params/README.md | 10 +++++----- docs/architecture/gas_params/estimator.md | 2 +- docs/architecture/gas_params/gas_profile.md | 10 +++++----- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/docs/architecture/gas_params/README.md b/docs/architecture/gas_params/README.md index 2085e72cdfa..d6715a9ecd9 100644 --- a/docs/architecture/gas_params/README.md +++ b/docs/architecture/gas_params/README.md @@ -15,11 +15,11 @@ The [Gas Profile](./gas_profile.md) section goes into more details on how gas costs of a transaction are tracked in nearcore. The [runtime parameter estimator](./estimator.md) is a separate binary within -the nearcore repository. It contains benchmarking-like code that is used to -validate existing parameters values. Or when new features are added, new code -has to be added there to estimate the safe values of new parameters. That -section is for you if you want to add new features, such as a new pre-compiled -method or other host functions. +the nearcore repository. It contains benchmarking-like code used to validate +existing parameters values. When implementing new features, code should be added +there to estimate the safe values of the new parameters. This section is for you +if you are adding new features such as a new pre-compiled method or other host +functions. diff --git a/docs/architecture/gas_params/estimator.md b/docs/architecture/gas_params/estimator.md index d3dd853e193..d7db9aaef13 100644 --- a/docs/architecture/gas_params/estimator.md +++ b/docs/architecture/gas_params/estimator.md @@ -7,7 +7,7 @@ all users collude to make the system as slow as possible. This benchmarking suite is used check that the gas parameters defined in the protocol are correct. Correct in this context means, a chunk filled with 1 Pgas -will only take 1 second to be applied. Or more generally, per 1 Tgas of +will take at most 1 second to be applied. Or more generally, per 1 Tgas of execution, we spend no more than 1ms wall-clock time. For now, nearcore timing is the only one that matters. Things will become more diff --git a/docs/architecture/gas_params/gas_profile.md b/docs/architecture/gas_params/gas_profile.md index d0fdebedd1b..b85fb5df2b1 100644 --- a/docs/architecture/gas_params/gas_profile.md +++ b/docs/architecture/gas_params/gas_profile.md @@ -1,11 +1,11 @@ # Gas Profile -The transaction runtime charges gas in various places around the code. But they -all end up summaries inside an `ActionResult`. More specifically, the counters -`gas_burnt` and `gas_used` and the `profile` field that keeps track of what the -gas has been spent on. +The transaction runtime charges gas in various places around the code. The +charges end up as summaries inside an `ActionResult`. More specifically, the +`gas_burnt` and `gas_used` counters track the total gas required and the +`profile` field keeps track of what the gas was spent on. -## Charing Gas +## Charging Gas Generally speaking, gas is charged right before the computation that it pays for is executed. It has to be before to avoid cheap resource exhaustion attacks. Imagine the user has only 1 gas unit left but we start executing an expensive From 1e621cef9ac8e981a1106c2edcb960191d53c741 Mon Sep 17 00:00:00 2001 From: Michal Nazarewicz Date: Tue, 25 Oct 2022 14:33:38 +0100 Subject: [PATCH 023/103] core: remove unused to_base58 function (#7920) --- core/primitives-core/src/serialize.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/core/primitives-core/src/serialize.rs b/core/primitives-core/src/serialize.rs index 8c513509c33..233eaf1abd5 100644 --- a/core/primitives-core/src/serialize.rs +++ b/core/primitives-core/src/serialize.rs @@ -1,7 +1,3 @@ -pub fn to_base58>(input: T) -> String { - bs58::encode(input).into_string() -} - pub fn to_base64>(input: T) -> String { base64::encode(&input) } From 21a82cfb22f6f319af8a23f7dfe033e8e02a42d5 Mon Sep 17 00:00:00 2001 From: Michal Nazarewicz Date: Tue, 25 Oct 2022 15:05:29 +0100 Subject: [PATCH 024/103] chain: remove unnecessary `mut` from self reference (#7924) --- chain/chain/src/chain.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chain/chain/src/chain.rs b/chain/chain/src/chain.rs index df404b62b51..b40646ea249 100644 --- a/chain/chain/src/chain.rs +++ b/chain/chain/src/chain.rs @@ -3391,7 +3391,7 @@ impl Chain { } pub fn check_blocks_final_and_canonical( - &mut self, + &self, block_headers: &[&BlockHeader], ) -> Result<(), Error> { let last_final_block_hash = *self.head_header()?.last_final_block(); From 4777e77ab06efc13b298788810a7cbeb12c2021d Mon Sep 17 00:00:00 2001 From: posvyatokum Date: Tue, 25 Oct 2022 16:46:39 +0100 Subject: [PATCH 025/103] store: Update cold storage with one column (Block) #7744 (#7745) --- Cargo.lock | 1 + core/store/src/cold_storage.rs | 303 ++++++++++++++++++ core/store/src/columns.rs | 104 ++++++ core/store/src/lib.rs | 2 + integration-tests/Cargo.toml | 9 +- .../src/tests/client/cold_storage.rs | 117 +++++++ integration-tests/src/tests/client/mod.rs | 2 + 7 files changed, 537 insertions(+), 1 deletion(-) create mode 100644 core/store/src/cold_storage.rs create mode 100644 integration-tests/src/tests/client/cold_storage.rs diff --git a/Cargo.lock b/Cargo.lock index 934aa709e85..ad8490f898a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2347,6 +2347,7 @@ dependencies = [ "serde", "serde_json", "smart-default", + "strum", "tempfile", "testlib", "tokio", diff --git a/core/store/src/cold_storage.rs b/core/store/src/cold_storage.rs new file mode 100644 index 00000000000..23e311946cb --- /dev/null +++ b/core/store/src/cold_storage.rs @@ -0,0 +1,303 @@ +use crate::columns::DBKeyType; +use crate::refcount::add_positive_refcount; +use crate::{DBCol, DBTransaction, Database, Store}; + +use borsh::BorshDeserialize; +use near_primitives::types::BlockHeight; +use std::collections::HashMap; +use std::io; +use strum::IntoEnumIterator; + +type StoreKey = Vec; +type StoreValue = Option>; +type StoreCache = HashMap<(DBCol, StoreKey), StoreValue>; + +struct StoreWithCache<'a> { + store: &'a Store, + cache: StoreCache, +} + +/// Updates provided cold database from provided hot store with information about block at `height`. +/// Wraps hot store in `StoreWithCache` for optimizing reads. +/// +/// First, we read from hot store information necessary +/// to determine all the keys that need to be updated in cold db. +/// Then we write updates to cold db column by column. +/// +/// This approach is used, because a key for db often combines several parts, +/// and many of those parts are reused across several cold columns (block hash, shard id, chunk hash, tx hash, ...). +/// Rather than manually combining those parts in the right order for every cold column, +/// we define `DBCol::key_type` to determine how a key for the column is formed, +/// `get_keys_from_store` to determine all possible keys only for needed key parts, +/// and `combine_keys` to generated all possible whole keys for the column based on order of those parts. +/// +/// To add a new column to cold storage, we need to +/// 1. add it to `DBCol::is_cold` list +/// 2. define `DBCol::key_type` for it (if it isn't already defined) +/// 3. add new clause in `get_keys_from_store` for new key types used for this column (if there are any) +pub fn update_cold_db( + cold_db: &dyn Database, + hot_store: &Store, + height: &BlockHeight, +) -> io::Result<()> { + let _span = tracing::debug_span!(target: "store", "update cold db", height = height); + + let mut store_with_cache = StoreWithCache { store: hot_store, cache: StoreCache::new() }; + + let key_type_to_keys = get_keys_from_store(&mut store_with_cache, height)?; + for col in DBCol::iter() { + if col.is_cold() { + copy_from_store( + cold_db, + &mut store_with_cache, + col, + combine_keys(&key_type_to_keys, &col.key_type()), + )?; + } + } + + Ok(()) +} + +/// Gets values for given keys in a column from provided hot_store. +/// Creates a transaction based on that values with set DBOp s. +/// Writes that transaction to cold_db. +fn copy_from_store( + cold_db: &dyn Database, + hot_store: &mut StoreWithCache, + col: DBCol, + keys: Vec, +) -> io::Result<()> { + let _span = tracing::debug_span!(target: "store", "create and write transaction to cold db", col = %col); + + let mut transaction = DBTransaction::new(); + for key in keys { + // TODO: Look into using RocksDB’s multi_key function. It + // might speed things up. Currently our Database abstraction + // doesn’t offer interface for it so that would need to be + // added. + let data = hot_store.get(col, &key)?; + if let Some(value) = data { + // Database checks col.is_rc() on read and write + // And in every way expects rc columns to be written with rc + // + // TODO: As an optimisation, we might consider breaking the + // abstraction layer. Since we’re always writing to cold database, + // rather than using `cold_db: &dyn Database` argument we cloud have + // `cold_db: &ColdDB` and then some custom function which lets us + // write raw bytes. + if col.is_rc() { + transaction.update_refcount( + col, + key, + add_positive_refcount(&value, std::num::NonZeroU32::new(1).unwrap()), + ); + } else { + transaction.set(col, key, value); + } + } + } + cold_db.write(transaction)?; + return Ok(()); +} + +pub fn test_cold_genesis_update(cold_db: &dyn Database, hot_store: &Store) -> io::Result<()> { + let mut store_with_cache = StoreWithCache { store: hot_store, cache: StoreCache::new() }; + for col in DBCol::iter() { + if col.is_cold() { + copy_from_store( + cold_db, + &mut store_with_cache, + col, + hot_store.iter(col).map(|x| x.unwrap().0.to_vec()).collect(), + )?; + } + } + Ok(()) +} + +/// Returns HashMap from DBKeyType to possible keys of that type for provided height. +/// Only constructs keys for key types that are used in cold columns. +/// The goal is to capture all changes to db made during production of the block at provided height. +/// So, for every KeyType we need to capture all the keys that are related to that block. +/// For BlockHash it is just one key -- block hash of that height. +/// But for TransactionHash, for example, it is all of the tx hashes in that block. +fn get_keys_from_store( + store: &mut StoreWithCache, + height: &BlockHeight, +) -> io::Result>> { + let mut key_type_to_keys = HashMap::new(); + + let height_key = height.to_le_bytes(); + let block_hash_key = store.get_or_err(DBCol::BlockHeight, &height_key)?.as_slice().to_vec(); + + for key_type in DBKeyType::iter() { + key_type_to_keys.insert( + key_type, + match key_type { + DBKeyType::BlockHash => vec![block_hash_key.clone()], + _ => { + vec![] + } + }, + ); + } + + Ok(key_type_to_keys) +} + +/// Returns all possible keys for a column with key represented by a specific sequence of key types. +/// `key_type_to_value` -- result of `get_keys_from_store`, mapping from KeyType to all possible keys of that type. +/// `key_types` -- description of a final key, what sequence of key types forms a key, result of `DBCol::key_type`. +/// Basically, returns all possible combinations of keys from `key_type_to_value` for given order of key types. +pub fn combine_keys( + key_type_to_value: &HashMap>, + key_types: &[DBKeyType], +) -> Vec { + combine_keys_with_stop(key_type_to_value, key_types, key_types.len()) +} + +/// Recursive method to create every combination of keys values for given order of key types. +/// stop: usize -- what length of key_types to consider. +/// first generates all the key combination for first stop - 1 key types +/// then adds every key value for the last key type to every key value generated by previous call. +fn combine_keys_with_stop( + key_type_to_keys: &HashMap>, + keys_order: &[DBKeyType], + stop: usize, +) -> Vec { + // if no key types are provided, return one empty key value + if stop == 0 { + return vec![StoreKey::new()]; + } + let last_kt = &keys_order[stop - 1]; + // if one of the key types has no keys, no need to calculate anything, the result is empty + if key_type_to_keys[last_kt].is_empty() { + return vec![]; + } + let all_smaller_keys = combine_keys_with_stop(key_type_to_keys, keys_order, stop - 1); + let mut result_keys = vec![]; + for prefix_key in &all_smaller_keys { + for suffix_key in &key_type_to_keys[last_kt] { + let mut new_key = prefix_key.clone(); + new_key.extend(suffix_key); + result_keys.push(new_key); + } + } + result_keys +} + +fn option_to_not_found(res: io::Result>, field_name: F) -> io::Result +where + F: std::string::ToString, +{ + match res { + Ok(Some(o)) => Ok(o), + Ok(None) => Err(io::Error::new(io::ErrorKind::NotFound, field_name.to_string())), + Err(e) => Err(e), + } +} + +#[allow(dead_code)] +impl StoreWithCache<'_> { + pub fn get(&mut self, column: DBCol, key: &[u8]) -> io::Result { + if !self.cache.contains_key(&(column, key.to_vec())) { + self.cache.insert( + (column.clone(), key.to_vec()), + self.store.get(column, key)?.map(|x| x.as_slice().to_vec()), + ); + } + Ok(self.cache[&(column, key.to_vec())].clone()) + } + + pub fn get_ser( + &mut self, + column: DBCol, + key: &[u8], + ) -> io::Result> { + match self.get(column, key)? { + Some(bytes) => Ok(Some(T::try_from_slice(&bytes)?)), + None => Ok(None), + } + } + + pub fn get_or_err(&mut self, column: DBCol, key: &[u8]) -> io::Result> { + option_to_not_found(self.get(column, key), format_args!("{:?}: {:?}", column, key)) + } + + pub fn get_ser_or_err( + &mut self, + column: DBCol, + key: &[u8], + ) -> io::Result { + option_to_not_found(self.get_ser(column, key), format_args!("{:?}: {:?}", column, key)) + } +} + +#[cfg(test)] +mod test { + use super::{combine_keys, StoreKey}; + use crate::columns::DBKeyType; + use std::collections::{HashMap, HashSet}; + + #[test] + fn test_combine_keys() { + // What DBKeyType s are used here does not matter + let key_type_to_keys = HashMap::from([ + (DBKeyType::BlockHash, vec![vec![1, 2, 3], vec![2, 3]]), + (DBKeyType::BlockHeight, vec![vec![0, 1], vec![3, 4, 5]]), + (DBKeyType::ShardId, vec![]), + ]); + + assert_eq!( + HashSet::::from_iter(combine_keys( + &key_type_to_keys, + &vec![DBKeyType::BlockHash, DBKeyType::BlockHeight] + )), + HashSet::::from_iter(vec![ + vec![1, 2, 3, 0, 1], + vec![1, 2, 3, 3, 4, 5], + vec![2, 3, 0, 1], + vec![2, 3, 3, 4, 5] + ]) + ); + + assert_eq!( + HashSet::::from_iter(combine_keys( + &key_type_to_keys, + &vec![DBKeyType::BlockHeight, DBKeyType::BlockHash, DBKeyType::BlockHeight] + )), + HashSet::::from_iter(vec![ + vec![0, 1, 1, 2, 3, 0, 1], + vec![0, 1, 1, 2, 3, 3, 4, 5], + vec![0, 1, 2, 3, 0, 1], + vec![0, 1, 2, 3, 3, 4, 5], + vec![3, 4, 5, 1, 2, 3, 0, 1], + vec![3, 4, 5, 1, 2, 3, 3, 4, 5], + vec![3, 4, 5, 2, 3, 0, 1], + vec![3, 4, 5, 2, 3, 3, 4, 5] + ]) + ); + + assert_eq!( + HashSet::::from_iter(combine_keys( + &key_type_to_keys, + &vec![DBKeyType::ShardId, DBKeyType::BlockHeight] + )), + HashSet::::from_iter(vec![]) + ); + + assert_eq!( + HashSet::::from_iter(combine_keys( + &key_type_to_keys, + &vec![DBKeyType::BlockHash, DBKeyType::ShardId] + )), + HashSet::::from_iter(vec![]) + ); + + assert_eq!( + HashSet::::from_iter(combine_keys(&key_type_to_keys, &vec![])), + HashSet::::from_iter(vec![vec![]]) + ); + } +} diff --git a/core/store/src/columns.rs b/core/store/src/columns.rs index e6f830ea1c7..a9f6188acf2 100644 --- a/core/store/src/columns.rs +++ b/core/store/src/columns.rs @@ -258,6 +258,39 @@ pub enum DBCol { FlatStateMisc, } +/// Defines different logical parts of a db key. +/// To access a column you can use a concatenation of several key types. +/// This is needed to define DBCol::key_type. +/// Update this enum and DBCol::key_type accordingly when creating a new column. +/// Currently only used in cold storage continuous migration. +#[derive(PartialEq, Copy, Clone, Debug, Hash, Eq, strum::EnumIter)] +pub enum DBKeyType { + /// Empty row name. Used in DBCol::LastComponentNonce. + Empty, + /// Set of predetermined strings. Used, for example, in DBCol::BlockMisc + StringLiteral, + BlockHash, + /// Hash of the previous block. Logically different from BlockHash. Used fro DBCol::NextBlockHashes. + PreviousBlockHash, + BlockHeight, + BlockOrdinal, + ShardId, + ShardUId, + ChunkHash, + EpochId, + Nonce, + PeerId, + AccountId, + TrieNodeOrValueHash, + TrieKey, + ReceiptHash, + TransactionHash, + OutcomeId, + ContractCacheKey, + PartId, + ColumnId, +} + impl DBCol { /// Whether data in this column is effectively immutable. /// @@ -331,6 +364,77 @@ impl DBCol { _ => false, } } + + /// Whether this column should be copied to the cold storage. + pub const fn is_cold(&self) -> bool { + match self { + DBCol::Block => true, + _ => false, + } + } + + /// Vector of DBKeyType s concatenation of which results in key for the column. + pub fn key_type(&self) -> &'static [DBKeyType] { + match self { + DBCol::DbVersion => &[DBKeyType::StringLiteral], + DBCol::BlockMisc => &[DBKeyType::StringLiteral], + DBCol::Block => &[DBKeyType::BlockHash], + DBCol::BlockHeader => &[DBKeyType::BlockHash], + DBCol::BlockHeight => &[DBKeyType::BlockHeight], + DBCol::State => &[DBKeyType::ShardUId, DBKeyType::TrieNodeOrValueHash], + DBCol::ChunkExtra => &[DBKeyType::BlockHash, DBKeyType::ShardUId], + DBCol::_TransactionResult => &[DBKeyType::OutcomeId], + DBCol::OutgoingReceipts => &[DBKeyType::BlockHash, DBKeyType::ShardId], + DBCol::IncomingReceipts => &[DBKeyType::BlockHash, DBKeyType::ShardId], + DBCol::Peers => &[DBKeyType::PeerId], + DBCol::EpochInfo => &[DBKeyType::EpochId], + DBCol::BlockInfo => &[DBKeyType::BlockHash], + DBCol::Chunks => &[DBKeyType::ChunkHash], + DBCol::PartialChunks => &[DBKeyType::ChunkHash], + DBCol::BlocksToCatchup => &[DBKeyType::BlockHash], + DBCol::StateDlInfos => &[DBKeyType::BlockHash], + DBCol::ChallengedBlocks => &[DBKeyType::BlockHash], + DBCol::StateHeaders => &[DBKeyType::ShardId, DBKeyType::BlockHash], + DBCol::InvalidChunks => &[DBKeyType::ChunkHash], + DBCol::BlockExtra => &[DBKeyType::BlockHash], + DBCol::BlockPerHeight => &[DBKeyType::BlockHeight], + DBCol::StateParts => &[DBKeyType::BlockHash, DBKeyType::ShardId, DBKeyType::PartId], + DBCol::EpochStart => &[DBKeyType::EpochId], + DBCol::AccountAnnouncements => &[DBKeyType::AccountId], + DBCol::NextBlockHashes => &[DBKeyType::PreviousBlockHash], + DBCol::EpochLightClientBlocks => &[DBKeyType::EpochId], + DBCol::ReceiptIdToShardId => &[DBKeyType::ReceiptHash], + DBCol::_NextBlockWithNewChunk => &[DBKeyType::BlockHash, DBKeyType::ShardId], + DBCol::_LastBlockWithNewChunk => &[DBKeyType::ShardId], + DBCol::PeerComponent => &[DBKeyType::PeerId], + DBCol::ComponentEdges => &[DBKeyType::Nonce], + DBCol::LastComponentNonce => &[DBKeyType::Empty], + DBCol::Transactions => &[DBKeyType::TransactionHash], + DBCol::_ChunkPerHeightShard => &[DBKeyType::BlockHeight, DBKeyType::ShardId], + DBCol::StateChanges => &[DBKeyType::BlockHash, DBKeyType::TrieKey], + DBCol::BlockRefCount => &[DBKeyType::BlockHash], + DBCol::TrieChanges => &[DBKeyType::BlockHash, DBKeyType::ShardUId], + DBCol::BlockMerkleTree => &[DBKeyType::BlockHash], + DBCol::ChunkHashesByHeight => &[DBKeyType::BlockHeight], + DBCol::BlockOrdinal => &[DBKeyType::BlockOrdinal], + DBCol::_GCCount => &[DBKeyType::ColumnId], + DBCol::OutcomeIds => &[DBKeyType::BlockHash, DBKeyType::ShardId], + DBCol::_TransactionRefCount => &[DBKeyType::TransactionHash], + DBCol::ProcessedBlockHeights => &[DBKeyType::BlockHeight], + DBCol::Receipts => &[DBKeyType::ReceiptHash], + DBCol::CachedContractCode => &[DBKeyType::ContractCacheKey], + DBCol::EpochValidatorInfo => &[DBKeyType::EpochId], + DBCol::HeaderHashesByHeight => &[DBKeyType::BlockHeight], + DBCol::StateChangesForSplitStates => &[DBKeyType::BlockHash, DBKeyType::ShardId], + DBCol::TransactionResultForBlock => &[DBKeyType::OutcomeId, DBKeyType::BlockHash], + #[cfg(feature = "protocol_feature_flat_state")] + DBCol::FlatState => &[DBKeyType::TrieKey], + #[cfg(feature = "protocol_feature_flat_state")] + DBCol::FlatStateDeltas => &[DBKeyType::ShardId, DBKeyType::BlockHash], + #[cfg(feature = "protocol_feature_flat_state")] + DBCol::FlatStateMisc => &[DBKeyType::ShardId], + } + } } impl fmt::Display for DBCol { diff --git a/core/store/src/lib.rs b/core/store/src/lib.rs index ebf242c723f..19cb17450c9 100644 --- a/core/store/src/lib.rs +++ b/core/store/src/lib.rs @@ -37,6 +37,8 @@ pub use crate::trie::{ }; pub use flat_state::FlatStateDelta; +#[cfg(feature = "cold_store")] +pub mod cold_storage; mod columns; pub mod config; pub mod db; diff --git a/integration-tests/Cargo.toml b/integration-tests/Cargo.toml index 39f87d10002..cfac063a895 100644 --- a/integration-tests/Cargo.toml +++ b/integration-tests/Cargo.toml @@ -23,6 +23,7 @@ rand.workspace = true serde.workspace = true serde_json.workspace = true smart-default.workspace = true +strum.workspace = true tempfile.workspace = true tokio.workspace = true tracing.workspace = true @@ -63,7 +64,10 @@ performance_stats = [ "near-network/performance_stats", ] expensive_tests = [] -test_features = ["nearcore/test_features"] +test_features = [ + "nearcore/test_features", + "near-store/test_features", +] protocol_feature_fix_contract_loading_cost = [ "nearcore/protocol_feature_fix_contract_loading_cost", ] @@ -85,3 +89,6 @@ sandbox = [ "near-client/sandbox", ] no_cache = ["nearcore/no_cache"] +cold_store = [ + "near-store/cold_store" +] diff --git a/integration-tests/src/tests/client/cold_storage.rs b/integration-tests/src/tests/client/cold_storage.rs new file mode 100644 index 00000000000..e6309bdaacf --- /dev/null +++ b/integration-tests/src/tests/client/cold_storage.rs @@ -0,0 +1,117 @@ +use crate::tests::client::process_blocks::create_nightshade_runtimes; +use near_chain::{ChainGenesis, Provenance}; +use near_chain_configs::Genesis; +use near_client::test_utils::TestEnv; +use near_crypto::{InMemorySigner, KeyType}; +use near_o11y::testonly::init_test_logger; +use near_primitives::transaction::{ + Action, DeployContractAction, FunctionCallAction, SignedTransaction, +}; +use near_store::cold_storage::{test_cold_genesis_update, update_cold_db}; +use near_store::db::TestDB; +use near_store::{DBCol, NodeStorage, Store, Temperature}; +use nearcore::config::GenesisExt; +use strum::IntoEnumIterator; + +fn check_key(first_store: &Store, second_store: &Store, col: DBCol, key: &[u8]) { + tracing::debug!("Checking {:?} {:?}", col, key); + + let first_res = first_store.get(col, key); + let second_res = second_store.get(col, key); + + assert_eq!(first_res.unwrap(), second_res.unwrap()); +} + +fn check_iter(first_store: &Store, second_store: &Store, col: DBCol) { + for (key, _) in first_store.iter(col).map(Result::unwrap) { + check_key(first_store, second_store, col, &key); + } +} + +/// Deploying test contract and calling write_random_value 5 times every block for 4 epochs. +/// Also doing 5 send transactions every block. +/// 4 epochs, because this test does not cover gc behaviour. +/// After every block updating a separate database with data from client's storage. +/// After 4 epochs we check that everything, that exists in cold columns +/// of the storage of the client also exists in the database to which we were writing. +#[test] +fn test_storage_after_commit_of_cold_update() { + init_test_logger(); + + let cold_db = TestDB::new(); + + let epoch_length = 5; + let max_height = epoch_length * 4; + + let mut genesis = Genesis::test(vec!["test0".parse().unwrap(), "test1".parse().unwrap()], 1); + + genesis.config.epoch_length = epoch_length; + let mut chain_genesis = ChainGenesis::test(); + chain_genesis.epoch_length = epoch_length; + let mut env = TestEnv::builder(chain_genesis) + .runtime_adapters(create_nightshade_runtimes(&genesis, 1)) + .build(); + + let mut last_hash = *env.clients[0].chain.genesis().hash(); + + test_cold_genesis_update(&*cold_db, &env.clients[0].runtime_adapter.store()).unwrap(); + + for h in 1..max_height { + let signer = InMemorySigner::from_seed("test0".parse().unwrap(), KeyType::ED25519, "test0"); + if h == 1 { + let tx = SignedTransaction::from_actions( + h, + "test0".parse().unwrap(), + "test0".parse().unwrap(), + &signer, + vec![Action::DeployContract(DeployContractAction { + code: near_test_contracts::rs_contract().to_vec(), + })], + last_hash, + ); + env.clients[0].process_tx(tx, false, false); + } + for i in 0..5 { + let tx = SignedTransaction::from_actions( + h * 10 + i, + "test0".parse().unwrap(), + "test0".parse().unwrap(), + &signer, + vec![Action::FunctionCall(FunctionCallAction { + method_name: "write_random_value".to_string(), + args: vec![], + gas: 100_000_000_000_000, + deposit: 0, + })], + last_hash, + ); + env.clients[0].process_tx(tx, false, false); + } + for i in 0..5 { + let tx = SignedTransaction::send_money( + h * 10 + i, + "test0".parse().unwrap(), + "test0".parse().unwrap(), + &signer, + 1, + last_hash, + ); + env.clients[0].process_tx(tx, false, false); + } + + let block = env.clients[0].produce_block(h).unwrap().unwrap(); + env.process_block(0, block.clone(), Provenance::PRODUCED); + + last_hash = block.hash().clone(); + + update_cold_db(&*cold_db, &env.clients[0].runtime_adapter.store(), &h).unwrap(); + } + + let cold_store = NodeStorage::new(cold_db).get_store(Temperature::Hot); + + for col in DBCol::iter() { + if col.is_cold() { + check_iter(&env.clients[0].runtime_adapter.store(), &cold_store, col); + } + } +} diff --git a/integration-tests/src/tests/client/mod.rs b/integration-tests/src/tests/client/mod.rs index 48ea89dccdf..746ae82deaf 100644 --- a/integration-tests/src/tests/client/mod.rs +++ b/integration-tests/src/tests/client/mod.rs @@ -1,6 +1,8 @@ mod benchmarks; mod challenges; mod chunks_management; +#[cfg(feature = "cold_store")] +mod cold_storage; mod features; mod process_blocks; mod runtimes; From fc07881d981991950f7829ed72552c92442bdd38 Mon Sep 17 00:00:00 2001 From: mzhangmzz <34969888+mzhangmzz@users.noreply.github.com> Date: Tue, 25 Oct 2022 14:45:55 -0400 Subject: [PATCH 026/103] [refactor] Refactor client and client actor to move the code for block processing to client (#7898) This PR is a pure refactoring. The context is that any processing details should be put in Client instead of ClientActor. ClientActor should just serve as a coordinator class to handle messages and check triggers and immediately pass it to Client. This is better for testing since we can't write unit test for any logic in ClientActor and also better for code readability as the logic is not scattered in two classes. This PR only moves the part around block processing. The rest is tracked by https://github.com/near/nearcore/issues/7899 --- chain/chain/src/test_utils.rs | 4 + chain/client/src/client.rs | 199 +++++++++++++++++- chain/client/src/client_actor.rs | 195 +++-------------- chain/client/src/tests/mod.rs | 1 + chain/client/src/tests/process_blocks.rs | 39 ++++ .../src/tests/client/process_blocks.rs | 21 -- 6 files changed, 266 insertions(+), 193 deletions(-) create mode 100644 chain/client/src/tests/process_blocks.rs diff --git a/chain/chain/src/test_utils.rs b/chain/chain/src/test_utils.rs index 7f93c94b4bd..75f0cee53ab 100644 --- a/chain/chain/src/test_utils.rs +++ b/chain/chain/src/test_utils.rs @@ -73,6 +73,10 @@ pub fn wait_for_all_blocks_in_processing(chain: &mut Chain) -> bool { chain.blocks_in_processing.wait_for_all_blocks() } +pub fn is_block_in_processing(chain: &Chain, block_hash: &CryptoHash) -> bool { + chain.blocks_in_processing.contains(block_hash) +} + pub fn wait_for_block_in_processing( chain: &mut Chain, hash: &CryptoHash, diff --git a/chain/client/src/client.rs b/chain/client/src/client.rs index e54a8a2fccd..39f278803df 100644 --- a/chain/client/src/client.rs +++ b/chain/client/src/client.rs @@ -27,7 +27,7 @@ use near_chain::{ }; use near_chain_configs::ClientConfig; use near_chunks::ShardsManager; -use near_network::types::{FullPeerInfo, NetworkRequests, PeerManagerAdapter}; +use near_network::types::{FullPeerInfo, NetworkRequests, PeerManagerAdapter, ReasonForBan}; use near_primitives::block::{Approval, ApprovalInner, ApprovalMessage, Block, BlockHeader, Tip}; use near_primitives::challenge::{Challenge, ChallengeBody}; use near_primitives::hash::CryptoHash; @@ -53,6 +53,7 @@ use near_network::types::{AccountKeys, ChainInfo, PeerManagerMessageRequest, Set use near_o11y::{log_assert, WithSpanContextExt}; use near_primitives::block_header::ApprovalType; use near_primitives::epoch_manager::RngSeed; +use near_primitives::network::PeerId; use near_primitives::version::PROTOCOL_VERSION; use near_primitives::views::CatchupStatusView; @@ -64,6 +65,8 @@ pub const EPOCH_SYNC_REQUEST_TIMEOUT: Duration = Duration::from_millis(1_000); /// How frequently a Epoch Sync response can be sent to a particular peer // TODO #3488 set 60_000 pub const EPOCH_SYNC_PEER_TIMEOUT: Duration = Duration::from_millis(10); +/// Drop blocks whose height are beyond head + horizon if it is not in the current epoch. +const BLOCK_HORIZON: u64 = 500; /// number of blocks at the epoch start for which we will log more detailed info pub const EPOCH_START_INFO_BLOCKS: u64 = 500; @@ -806,6 +809,165 @@ impl Client { } } + /// Processes received block. Ban peer if the block header is invalid or the block is ill-formed. + // This function is just a wrapper for process_block_impl that makes error propagation easier. + pub fn receive_block( + &mut self, + block: Block, + peer_id: PeerId, + was_requested: bool, + apply_chunks_done_callback: DoneApplyChunkCallback, + ) { + let hash = *block.hash(); + let prev_hash = *block.header().prev_hash(); + let _span = tracing::debug_span!( + target: "client", + "receive_block", + me = ?self.validator_signer.as_ref().map(|vs| vs.validator_id()), + %prev_hash, + %hash, + height = block.header().height(), + %peer_id, + was_requested) + .entered(); + + let res = self.receive_block_impl( + block, + peer_id.clone(), + was_requested, + apply_chunks_done_callback, + ); + // Log the errors here. Note that the real error handling logic is already + // done within process_block_impl, this is just for logging. + if let Err(err) = res { + if err.is_bad_data() { + warn!(target: "client", "Receive bad block: {}", err); + } else if err.is_error() { + if let near_chain::Error::DBNotFoundErr(msg) = &err { + debug_assert!(!msg.starts_with("BLOCK HEIGHT"), "{:?}", err); + } + if self.sync_status.is_syncing() { + // While syncing, we may receive blocks that are older or from next epochs. + // This leads to Old Block or EpochOutOfBounds errors. + debug!(target: "client", "Error on receival of block: {}", err); + } else { + error!(target: "client", "Error on receival of block: {}", err); + } + } else { + debug!(target: "client", error = %err, "Process block: refused by chain"); + } + } + } + + /// Processes received block. + /// This function first does some pre-check based on block height to avoid processing + /// blocks multiple times. + /// Then it process the block header. If the header if valid, broadcast the block to its peers + /// Then it starts the block processing process to process the full block. + pub(crate) fn receive_block_impl( + &mut self, + block: Block, + peer_id: PeerId, + was_requested: bool, + apply_chunks_done_callback: DoneApplyChunkCallback, + ) -> Result<(), near_chain::Error> { + // To protect ourselves from spamming, we do some pre-check on block height before we do any + // real processing. + if !self.check_block_height(&block, was_requested)? { + return Ok(()); + } + let prev_hash = *block.header().prev_hash(); + let block = block.into(); + self.verify_and_rebroadcast_block(&block, was_requested, &peer_id)?; + let provenance = + if was_requested { near_chain::Provenance::SYNC } else { near_chain::Provenance::NONE }; + let res = self.start_process_block(block, provenance, apply_chunks_done_callback); + match &res { + Err(near_chain::Error::Orphan) => { + if !self.chain.is_orphan(&prev_hash) { + self.request_block(prev_hash, peer_id) + } + } + _ => {} + } + res + } + + /// To protect ourselves from spamming, we do some pre-check on block height before we do any + /// processing. This function returns true if the block height is valid. + fn check_block_height( + &self, + block: &Block, + was_requested: bool, + ) -> Result { + let head = self.chain.head()?; + let is_syncing = self.sync_status.is_syncing(); + if block.header().height() >= head.height + BLOCK_HORIZON && is_syncing && !was_requested { + debug!(target: "client", head_height = head.height, "Dropping a block that is too far ahead."); + return Ok(false); + } + let tail = self.chain.tail()?; + if block.header().height() < tail { + debug!(target: "client", tail_height = tail, "Dropping a block that is too far behind."); + return Ok(false); + } + // drop the block if a) it is not requested, b) we already processed this height, + //est-utils/actix-test-utils/src/lib.rs c) it is not building on top of current head + if !was_requested + && block.header().prev_hash() + != &self + .chain + .head() + .map_or_else(|_| CryptoHash::default(), |tip| tip.last_block_hash) + { + if self.chain.is_height_processed(block.header().height())? { + debug!(target: "client", height = block.header().height(), "Dropping a block because we've seen this height before and we didn't request it"); + return Ok(false); + } + } + Ok(true) + } + + /// Verify the block and rebroadcast it if it is valid, ban the peer if it's invalid. + /// Ignore all other errors because the full block will be processed later. + /// Note that this happens before the full block processing logic because we want blocks to be + /// propagated in the network fast. + fn verify_and_rebroadcast_block( + &mut self, + block: &MaybeValidated, + was_requested: bool, + peer_id: &PeerId, + ) -> Result<(), near_chain::Error> { + let res = self.chain.process_block_header(block.header(), &mut vec![]); + let res = res.and_then(|_| self.chain.validate_block(block)); + match res { + Ok(_) => { + let head = self.chain.head()?; + // do not broadcast blocks that are too far back. + if (head.height < block.header().height() + || &head.epoch_id == block.header().epoch_id()) + && !was_requested + && !self.sync_status.is_syncing() + { + self.rebroadcast_block(block.as_ref().into_inner()); + } + Ok(()) + } + Err(e) if e.is_bad_data() => { + self.ban_peer(peer_id.clone(), ReasonForBan::BadBlockHeader); + Err(e) + } + Err(_) => { + // We are ignoring all other errors and proceeding with the + // block. If it is an orphan (i.e. we haven’t processed its + // previous block) than we will get MissingBlock errors. In + // those cases we shouldn’t reject the block instead passing + // it along. Eventually, it’ll get saved as an orphan. + Ok(()) + } + } + } + /// Start the processing of a block. Note that this function will return before /// the full processing is finished because applying chunks is done asynchronously /// in the rayon thread pool. @@ -931,7 +1093,7 @@ impl Client { self.request_missing_chunks(blocks_missing_chunks, orphans_missing_chunks); } - pub fn rebroadcast_block(&mut self, block: &Block) { + fn rebroadcast_block(&mut self, block: &Block) { if self.rebroadcasted_blocks.get(block.hash()).is_none() { self.network_adapter.do_send( PeerManagerMessageRequest::NetworkRequests(NetworkRequests::Block { @@ -1867,6 +2029,39 @@ impl Client { } } +/* implements functions used to communicate with network */ +impl Client { + pub fn request_block(&self, hash: CryptoHash, peer_id: PeerId) { + match self.chain.block_exists(&hash) { + Ok(false) => { + self.network_adapter.do_send( + PeerManagerMessageRequest::NetworkRequests(NetworkRequests::BlockRequest { + hash, + peer_id, + }) + .with_span_context(), + ); + } + Ok(true) => { + debug!(target: "client", "send_block_request_to_peer: block {} already known", hash) + } + Err(e) => { + error!(target: "client", "send_block_request_to_peer: failed to check block exists: {:?}", e) + } + } + } + + pub fn ban_peer(&self, peer_id: PeerId, ban_reason: ReasonForBan) { + self.network_adapter.do_send( + PeerManagerMessageRequest::NetworkRequests(NetworkRequests::BanPeer { + peer_id, + ban_reason, + }) + .with_span_context(), + ); + } +} + impl Client { /// Each epoch defines a set of important accounts: block producers, chunk producers, /// approvers. Low-latency reliable communication between those accounts is critical, diff --git a/chain/client/src/client_actor.rs b/chain/client/src/client_actor.rs index ce9d35df40d..07bbfd7db01 100644 --- a/chain/client/src/client_actor.rs +++ b/chain/client/src/client_actor.rs @@ -1,4 +1,9 @@ //! Client actor orchestrates Client and facilitates network connection. +//! It should just serve as a coordinator class to handle messages and check triggers but immediately +//! pass the control to Client. This means, any real block processing or production logic should +//! be put in Client. +//! Unfortunately, this is not the case today. We are in the process of refactoring ClientActor +//! https://github.com/near/nearcore/issues/7899 use crate::adapter::{NetworkClientMessages, NetworkClientResponses}; use crate::client::{Client, EPOCH_START_INFO_BLOCKS}; @@ -64,8 +69,6 @@ use tracing::{debug, error, info, trace, warn}; /// Multiplier on `max_block_time` to wait until deciding that chain stalled. const STATUS_WAIT_TIME_MULTIPLIER: u64 = 10; -/// Drop blocks whose height are beyond head + horizon if it is not in the current epoch. -const BLOCK_HORIZON: u64 = 500; /// `max_block_production_time` times this multiplier is how long we wait before rebroadcasting /// the current `head` const HEAD_STALL_MULTIPLIER: u32 = 4; @@ -415,7 +418,12 @@ impl ClientActor { return NetworkClientResponses::NoResponse; } } - self.receive_block(block, peer_id, was_requested); + self.client.receive_block( + block, + peer_id.clone(), + was_requested, + self.get_apply_chunks_done_callback(), + ); NetworkClientResponses::NoResponse } else { match self @@ -1231,10 +1239,20 @@ impl ClientActor { fn produce_block(&mut self, next_height: BlockHeight) -> Result<(), Error> { let _span = tracing::debug_span!(target: "client", "produce_block", next_height).entered(); if let Some(block) = self.client.produce_block(next_height)? { - let peer_id = self.node_id.clone(); + // If we produced the block, send it out before we apply the block. + self.network_adapter.do_send( + PeerManagerMessageRequest::NetworkRequests(NetworkRequests::Block { + block: block.clone(), + }) + .with_span_context(), + ); // We’ve produced the block so that counts as validated block. let block = MaybeValidated::from_validated(block); - let res = self.process_block(block, Provenance::PRODUCED, &peer_id); + let res = self.client.start_process_block( + block, + Provenance::PRODUCED, + self.get_apply_chunks_done_callback(), + ); if let Err(e) = &res { match e { near_chain::Error::ChunksMissing(_) => { @@ -1310,73 +1328,6 @@ impl ClientActor { } } - /// Process block and execute callbacks. - fn process_block( - &mut self, - block: MaybeValidated, - provenance: Provenance, - peer_id: &PeerId, - ) -> Result<(), near_chain::Error> { - let _span = tracing::debug_span!( - target: "client", - "process_block", - height = block.header().height()) - .entered(); - debug!(target: "client", ?provenance, ?peer_id); - // If we produced the block, send it out before we apply the block. - // If we didn't produce the block and didn't request it, do basic validation - // before sending it out. - if provenance == Provenance::PRODUCED { - self.network_adapter.do_send( - PeerManagerMessageRequest::NetworkRequests(NetworkRequests::Block { - block: block.as_ref().into_inner().clone(), - }) - .with_span_context(), - ); - // If we produced it, we don’t need to validate it. Mark the block - // as valid. - block.mark_as_valid(); - } else { - let chain = &mut self.client.chain; - // TODO: refactor this after we make apply_chunks async. After that, process_block - // will return before the full block is finished processing, and we can simply move the - // rebroadcast_block logic to after self.client.process_block - let res = chain.process_block_header(block.header(), &mut vec![]); - let res = res.and_then(|_| chain.validate_block(&block)); - match res { - Ok(_) => { - let head = self.client.chain.head()?; - // do not broadcast blocks that are too far back. - if (head.height < block.header().height() - || &head.epoch_id == block.header().epoch_id()) - && provenance == Provenance::NONE - && !self.client.sync_status.is_syncing() - { - self.client.rebroadcast_block(block.as_ref().into_inner()); - } - } - Err(e) if e.is_bad_data() => { - self.network_adapter.do_send( - PeerManagerMessageRequest::NetworkRequests(NetworkRequests::BanPeer { - peer_id: peer_id.clone(), - ban_reason: ReasonForBan::BadBlockHeader, - }) - .with_span_context(), - ); - return Err(e); - } - Err(_) => { - // We are ignoring all other errors and proceeding with the - // block. If it is an orphan (i.e. we haven’t processed its - // previous block) than we will get MissingBlock errors. In - // those cases we shouldn’t reject the block instead passing - // it along. Eventually, it’ll get saved as an orphan. - } - } - } - self.client.start_process_block(block, provenance, self.get_apply_chunks_done_callback()) - } - /// Returns the callback function that will be passed to various functions that may trigger /// the processing of new blocks. This callback will be called at the end of applying chunks /// for every block. @@ -1387,88 +1338,12 @@ impl ClientActor { }) } - /// Processes received block. Ban peer if the block header is invalid or the block is ill-formed. - fn receive_block(&mut self, block: Block, peer_id: PeerId, was_requested: bool) { - let hash = *block.hash(); - let _span = tracing::debug_span!( - target: "client", - "receive_block", - me = ?self.client.validator_signer.as_ref().map(|vs| vs.validator_id()), - prev_hash = %block.header().prev_hash(), - %hash, - height = block.header().height(), - %peer_id, - was_requested) - .entered(); - let head = unwrap_or_return!(self.client.chain.head()); - let is_syncing = self.client.sync_status.is_syncing(); - if block.header().height() >= head.height + BLOCK_HORIZON && is_syncing && !was_requested { - debug!(target: "client", head_height = head.height, "Dropping a block that is too far ahead."); - return; - } - let tail = unwrap_or_return!(self.client.chain.tail()); - if block.header().height() < tail { - debug!(target: "client", tail_height = tail, "Dropping a block that is too far behind."); - return; - } - // drop the block if a) it is not requested, b) we already processed this height, c) it is not building on top of current head - // Note that this check must happen before process_block where we try to validate block - // header and rebroadcast blocks, otherwise blocks that failed processing could be - // processed and rebroadcasted again and again. - if !was_requested - && block.header().prev_hash() - != &self - .client - .chain - .head() - .map_or_else(|_| CryptoHash::default(), |tip| tip.last_block_hash) - { - if self.client.chain.is_height_processed(block.header().height()).unwrap_or_default() { - debug!(target: "client", height = block.header().height(), "Dropping a block because we've seen this height before and we didn't request it"); - return; - } - } - let prev_hash = *block.header().prev_hash(); - let provenance = - if was_requested { near_chain::Provenance::SYNC } else { near_chain::Provenance::NONE }; - match self.process_block(block.into(), provenance, &peer_id) { - Ok(_) => {} - Err(ref err) if err.is_bad_data() => { - warn!(target: "client", "Receive bad block: {}", err); - } - Err(ref err) if err.is_error() => { - if let near_chain::Error::DBNotFoundErr(msg) = err { - debug_assert!(!msg.starts_with("BLOCK HEIGHT"), "{:?}", err); - } - if self.client.sync_status.is_syncing() { - // While syncing, we may receive blocks that are older or from next epochs. - // This leads to Old Block or EpochOutOfBounds errors. - debug!(target: "client", "Error on receival of block: {}", err); - } else { - error!(target: "client", "Error on receival of block: {}", err); - } - } - Err(e) => match e { - near_chain::Error::Orphan => { - if !self.client.chain.is_orphan(&prev_hash) { - self.request_block(prev_hash, peer_id) - } - } - // missing chunks are already handled in self.client.process_block() - // we don't need to do anything here - near_chain::Error::ChunksMissing(_) => {} - _ => { - debug!(target: "client", error = %e, "Process block: refused by chain"); - } - }, - } - } - fn receive_headers(&mut self, headers: Vec, peer_id: PeerId) -> bool { info!(target: "client", "Received {} block headers from {}", headers.len(), peer_id); if headers.len() == 0 { return true; } + info!(target: "client", "Received block headers from height {} to {}", headers.first().unwrap().height(), headers.last().unwrap().height()); match self.client.sync_block_headers(headers) { Ok(_) => true, Err(err) => { @@ -1483,26 +1358,6 @@ impl ClientActor { } } - fn request_block(&mut self, hash: CryptoHash, peer_id: PeerId) { - match self.client.chain.block_exists(&hash) { - Ok(false) => { - self.network_adapter.do_send( - PeerManagerMessageRequest::NetworkRequests(NetworkRequests::BlockRequest { - hash, - peer_id, - }) - .with_span_context(), - ); - } - Ok(true) => { - debug!(target: "client", "send_block_request_to_peer: block {} already known", hash) - } - Err(e) => { - error!(target: "client", "send_block_request_to_peer: failed to check block exists: {:?}", e) - } - } - } - /// Check whether need to (continue) sync. /// Also return higher height with known peers at that height. fn syncing_info(&self) -> Result<(bool, u64), near_chain::Error> { @@ -1777,7 +1632,7 @@ impl ClientActor { for hash in vec![*header.prev_hash(), *header.hash()].into_iter() { - self.request_block(hash, id.clone()); + self.client.request_block(hash, id.clone()); } } } diff --git a/chain/client/src/tests/mod.rs b/chain/client/src/tests/mod.rs index 52390dba87f..61997e4373f 100644 --- a/chain/client/src/tests/mod.rs +++ b/chain/client/src/tests/mod.rs @@ -3,4 +3,5 @@ mod catching_up; mod chunks_management; mod consensus; mod cross_shard_tx; +mod process_blocks; mod query_client; diff --git a/chain/client/src/tests/process_blocks.rs b/chain/client/src/tests/process_blocks.rs new file mode 100644 index 00000000000..fdc26dc1f22 --- /dev/null +++ b/chain/client/src/tests/process_blocks.rs @@ -0,0 +1,39 @@ +use crate::test_utils::TestEnv; +use near_chain::{test_utils, ChainGenesis, Provenance}; +use near_crypto::{KeyType, PublicKey}; +use near_primitives::network::PeerId; +use near_primitives::types::validator_stake::ValidatorStake; +use near_primitives::validator_signer::InMemoryValidatorSigner; +use std::sync::Arc; + +/// Only process one block per height +/// Test that if a node receives two blocks at the same height, it doesn't process the second one +/// if the second block is not requested +#[test] +fn test_not_process_height_twice() { + let mut env = TestEnv::builder(ChainGenesis::test()).build(); + let block = env.clients[0].produce_block(1).unwrap().unwrap(); + // modify the block and resign it + let mut duplicate_block = block.clone(); + env.process_block(0, block, Provenance::PRODUCED); + let validator_signer = + InMemoryValidatorSigner::from_seed("test0".parse().unwrap(), KeyType::ED25519, "test0"); + let proposals = + vec![ValidatorStake::new("test1".parse().unwrap(), PublicKey::empty(KeyType::ED25519), 0)]; + duplicate_block.mut_header().get_mut().inner_rest.validator_proposals = proposals; + duplicate_block.mut_header().resign(&validator_signer); + let dup_block_hash = *duplicate_block.hash(); + // we should have dropped the block before we even tried to process it, so the result should be ok + env.clients[0] + .receive_block_impl( + duplicate_block, + PeerId::new(PublicKey::empty(KeyType::ED25519)), + false, + Arc::new(|_| {}), + ) + .unwrap(); + // check that the second block is not being processed + assert!(!test_utils::is_block_in_processing(&env.clients[0].chain, &dup_block_hash)); + // check that we didn't rebroadcast the second block + assert!(env.network_adapters[0].pop().is_none()); +} diff --git a/integration-tests/src/tests/client/process_blocks.rs b/integration-tests/src/tests/client/process_blocks.rs index db9c9acd1d3..4d281abfb95 100644 --- a/integration-tests/src/tests/client/process_blocks.rs +++ b/integration-tests/src/tests/client/process_blocks.rs @@ -2127,27 +2127,6 @@ fn test_sync_hash_validity() { } } -/// Only process one block per height -/// Temporarily disable this test because the is_height_processed check is moved to client actor -/// TODO (Min): refactor client actor receive_block code to move it to client -#[ignore] -#[test] -fn test_not_process_height_twice() { - let mut env = TestEnv::builder(ChainGenesis::test()).build(); - let block = env.clients[0].produce_block(1).unwrap().unwrap(); - let mut invalid_block = block.clone(); - env.process_block(0, block, Provenance::PRODUCED); - let validator_signer = - InMemoryValidatorSigner::from_seed("test0".parse().unwrap(), KeyType::ED25519, "test0"); - let proposals = - vec![ValidatorStake::new("test1".parse().unwrap(), PublicKey::empty(KeyType::ED25519), 0)]; - invalid_block.mut_header().get_mut().inner_rest.validator_proposals = proposals; - invalid_block.mut_header().resign(&validator_signer); - let accepted_blocks = - env.clients[0].process_block_test(invalid_block.into(), Provenance::NONE).unwrap(); - assert!(accepted_blocks.is_empty()); -} - #[test] fn test_block_height_processed_orphan() { let mut env = TestEnv::builder(ChainGenesis::test()).build(); From c45f615ee9697b2790af8cf83d5e79129813d99b Mon Sep 17 00:00:00 2001 From: robin-near <111538878+robin-near@users.noreply.github.com> Date: Tue, 25 Oct 2022 18:17:25 -0700 Subject: [PATCH 027/103] Fix proposals shuffling implementation (#7921) --- chain/epoch-manager/src/proposals.rs | 48 +++++++++++++++++++++++++--- 1 file changed, 43 insertions(+), 5 deletions(-) diff --git a/chain/epoch-manager/src/proposals.rs b/chain/epoch-manager/src/proposals.rs index 682934ad49b..50139b7ea9b 100644 --- a/chain/epoch-manager/src/proposals.rs +++ b/chain/epoch-manager/src/proposals.rs @@ -86,8 +86,7 @@ mod old_validator_selection { AccountId, Balance, NumSeats, ValidatorId, ValidatorKickoutReason, }; use near_primitives::version::ProtocolVersion; - use rand::seq::SliceRandom; - use rand::SeedableRng; + use rand::{RngCore, SeedableRng}; use rand_hc::Hc128Rng; use crate::proposals::find_threshold; @@ -270,24 +269,63 @@ mod old_validator_selection { fn shuffle_duplicate_proposals(dup_proposals: &mut Vec, rng_seed: RngSeed) { let mut rng: Hc128Rng = SeedableRng::from_seed(rng_seed); - dup_proposals.shuffle(&mut rng); + for i in (1..dup_proposals.len()).rev() { + dup_proposals.swap(i, gen_index_old(&mut rng, (i + 1) as u64) as usize); + } } + fn gen_index_old(rng: &mut Hc128Rng, bound: u64) -> u64 { + // This is a simplified copy of the rand gen_index implementation to ensure that + // upgrades to the rand library will not cause a change in the shuffling behavior. + let zone = (bound << bound.leading_zeros()).wrapping_sub(1); + loop { + let v = rng.next_u64(); + let mul = (v as u128) * (bound as u128); + let (hi, lo) = ((mul >> 64) as u64, mul as u64); + if lo < zone { + return hi; + } + } + } #[cfg(test)] mod tests { use near_primitives::hash::CryptoHash; use crate::proposals::old_validator_selection::shuffle_duplicate_proposals; + #[test] + pub fn proposal_shuffling_sanity_checks() { + // Since we made our own impl for shuffling, do some sanity checks. + for i in 0..10 { + let mut dup_proposals = (0..i).collect::>(); + shuffle_duplicate_proposals( + &mut dup_proposals, + CryptoHash::hash_bytes(&[1, 2, 3, 4, 5]).as_bytes().clone(), + ); + assert_eq!(dup_proposals.len(), i as usize); + dup_proposals.sort(); + assert_eq!(dup_proposals, (0..i).collect::>()); + } + } + #[test] pub fn proposal_randomness_reproducibility() { // Sanity check that the proposal shuffling implementation does not change. - let mut dup_proposals = vec![0, 1, 2, 3, 4, 5, 6]; + let mut dup_proposals = (0..100).collect::>(); shuffle_duplicate_proposals( &mut dup_proposals, CryptoHash::hash_bytes(&[1, 2, 3, 4, 5]).as_bytes().clone(), ); - assert_eq!(dup_proposals, vec![3, 1, 0, 4, 5, 6, 2]); + assert_eq!( + dup_proposals, + vec![ + 28, 64, 35, 39, 5, 19, 91, 93, 32, 55, 49, 86, 7, 34, 58, 48, 65, 11, 0, 3, 63, + 85, 96, 12, 23, 76, 29, 69, 31, 45, 1, 15, 33, 61, 38, 74, 87, 10, 62, 9, 40, + 56, 98, 8, 52, 75, 99, 13, 57, 44, 6, 79, 89, 84, 68, 36, 94, 53, 80, 70, 42, + 88, 73, 2, 72, 25, 20, 67, 37, 97, 41, 71, 47, 59, 24, 66, 54, 21, 18, 26, 60, + 92, 50, 77, 81, 14, 43, 17, 90, 95, 78, 16, 30, 46, 22, 83, 27, 4, 51, 82 + ] + ); } } } From 78836a9dc1bf0df85e25c64f0734616dff4ba14e Mon Sep 17 00:00:00 2001 From: robin-near <111538878+robin-near@users.noreply.github.com> Date: Tue, 25 Oct 2022 19:21:30 -0700 Subject: [PATCH 028/103] [Debug UI] Improve last-blocks debug page (#7902) * New last-blocks debug page. * Use JSX with babel * Minor fix * Minor fix 2 * Rename is_block_missing --- chain/client-primitives/src/debug.rs | 24 +- chain/client/src/debug.rs | 188 ++++++---- chain/jsonrpc-primitives/src/types/status.rs | 4 +- chain/jsonrpc/res/last_blocks.html | 275 ++++++--------- chain/jsonrpc/res/last_blocks.js | 346 +++++++++++++++++++ chain/jsonrpc/src/lib.rs | 38 +- 6 files changed, 617 insertions(+), 258 deletions(-) create mode 100644 chain/jsonrpc/res/last_blocks.js diff --git a/chain/client-primitives/src/debug.rs b/chain/client-primitives/src/debug.rs index bbe0e8e8f51..13cda11270e 100644 --- a/chain/client-primitives/src/debug.rs +++ b/chain/client-primitives/src/debug.rs @@ -47,17 +47,33 @@ pub struct DebugChunkStatus { #[derive(Serialize, Deserialize, Debug)] pub struct DebugBlockStatus { pub block_hash: CryptoHash, + pub prev_block_hash: CryptoHash, pub block_height: u64, + pub block_timestamp: u64, pub block_producer: Option, + pub full_block_missing: bool, // only header available + pub is_on_canonical_chain: bool, pub chunks: Vec, // Time that was spent processing a given block. #[serde(skip_serializing_if = "Option::is_none")] pub processing_time_ms: Option, - // Time between this block and the next one in chain. - pub timestamp_delta: u64, pub gas_price_ratio: f64, } +#[derive(Serialize, Deserialize, Debug)] +pub struct MissedHeightInfo { + pub block_height: u64, + pub block_producer: Option, +} + +#[derive(Serialize, Deserialize, Debug)] +pub struct DebugBlockStatusData { + pub blocks: Vec, + pub missed_heights: Vec, + pub head: CryptoHash, + pub header_head: CryptoHash, +} + // Information about the approval created by this node. // Used for debug purposes only. #[derive(Serialize, Debug, Clone)] @@ -157,7 +173,7 @@ pub enum DebugStatus { // Detailed information about last couple epochs. EpochInfo, // Detailed information about last couple blocks. - BlockStatus, + BlockStatus(Option), // Consensus related information. ValidatorStatus, // Request for the current catchup status @@ -176,7 +192,7 @@ pub enum DebugStatusResponse { // List of epochs - in descending order (next epoch is first). EpochInfo(Vec), // Detailed information about blocks. - BlockStatus(Vec), + BlockStatus(DebugBlockStatusData), // Detailed information about the validator (approvals, block & chunk production etc.) ValidatorStatus(ValidatorStatus), } diff --git a/chain/client/src/debug.rs b/chain/client/src/debug.rs index 65b28c7d487..9f35aa3283f 100644 --- a/chain/client/src/debug.rs +++ b/chain/client/src/debug.rs @@ -6,8 +6,8 @@ use borsh::BorshSerialize; use near_chain::crypto_hash_timer::CryptoHashTimer; use near_chain::{near_chain_primitives, ChainStoreAccess, RuntimeAdapter}; use near_client_primitives::debug::{ - ApprovalAtHeightStatus, BlockProduction, ChunkCollection, DebugStatus, DebugStatusResponse, - ProductionAtHeight, ValidatorStatus, + ApprovalAtHeightStatus, BlockProduction, ChunkCollection, DebugBlockStatusData, DebugStatus, + DebugStatusResponse, MissedHeightInfo, ProductionAtHeight, ValidatorStatus, }; use near_client_primitives::types::Error; use near_client_primitives::{ @@ -163,8 +163,8 @@ impl Handler> for ClientActor { DebugStatus::EpochInfo => { Ok(DebugStatusResponse::EpochInfo(self.get_recent_epoch_info()?)) } - DebugStatus::BlockStatus => { - Ok(DebugStatusResponse::BlockStatus(self.get_last_blocks_info()?)) + DebugStatus::BlockStatus(height) => { + Ok(DebugStatusResponse::BlockStatus(self.get_last_blocks_info(height)?)) } DebugStatus::ValidatorStatus => { Ok(DebugStatusResponse::ValidatorStatus(self.get_validator_status()?)) @@ -382,87 +382,127 @@ impl ClientActor { fn get_last_blocks_info( &mut self, - ) -> Result, near_chain_primitives::Error> { + starting_height: Option, + ) -> Result { let head = self.client.chain.head()?; + let header_head = self.client.chain.header_head()?; - let mut blocks_debug: Vec = Vec::new(); - let mut last_block_hash = head.last_block_hash; - let mut last_block_timestamp: u64 = 0; - let mut last_block_height = head.height + 1; - + let mut blocks: HashMap = HashMap::new(); + let mut missed_heights: Vec = Vec::new(); + let mut last_epoch_id = head.epoch_id.clone(); let initial_gas_price = self.client.chain.genesis_block().header().gas_price(); - // Fetch last 50 blocks (we can fetch more blocks in the future if needed) - for _ in 0..DEBUG_BLOCKS_TO_FETCH { - let block = match self.client.chain.get_block(&last_block_hash) { - Ok(block) => block, - Err(_) => break, + let mut height_to_fetch = starting_height.unwrap_or(header_head.height); + let min_height_to_fetch = + max(height_to_fetch as i64 - DEBUG_BLOCKS_TO_FETCH as i64, 0) as u64; + let mut block_hashes_to_force_fetch = HashSet::new(); + while height_to_fetch > min_height_to_fetch || !block_hashes_to_force_fetch.is_empty() { + let block_hashes = if height_to_fetch > min_height_to_fetch { + let block_hashes: Vec = self + .client + .chain + .store() + .get_all_header_hashes_by_height(height_to_fetch)? + .into_iter() + .collect(); + if block_hashes.is_empty() { + missed_heights.push(MissedHeightInfo { + block_height: height_to_fetch, + block_producer: self + .client + .runtime_adapter + .get_block_producer(&last_epoch_id, height_to_fetch) + .ok(), + }); + } + height_to_fetch -= 1; + block_hashes + } else { + let block_hashes = block_hashes_to_force_fetch.iter().cloned().collect(); + block_hashes_to_force_fetch.clear(); + block_hashes }; - // If there is a gap - and some blocks were not produced - make sure to report this - // (and mention who was supposed to be a block producer). - for height in (block.header().height() + 1..last_block_height).rev() { + for block_hash in block_hashes { + if blocks.contains_key(&block_hash) { + continue; + } + let block_header = self.client.chain.get_block_header(&block_hash)?; + let block = self.client.chain.get_block(&block_hash).ok(); + let is_on_canonical_chain = + match self.client.chain.get_block_by_height(block_header.height()) { + Ok(block) => block.hash() == &block_hash, + Err(_) => false, + }; + let block_producer = self .client .runtime_adapter - .get_block_producer(block.header().epoch_id(), height) + .get_block_producer(block_header.epoch_id(), block_header.height()) .ok(); - blocks_debug.push(DebugBlockStatus { - block_hash: CryptoHash::default(), - block_height: height, - block_producer, - chunks: vec![], - processing_time_ms: None, - timestamp_delta: 0, - gas_price_ratio: 1.0, - }); - } - - let block_producer = self - .client - .runtime_adapter - .get_block_producer(block.header().epoch_id(), block.header().height()) - .ok(); - - let chunks = block - .chunks() - .iter() - .map(|chunk| DebugChunkStatus { - shard_id: chunk.shard_id(), - chunk_hash: chunk.chunk_hash(), - chunk_producer: self - .client - .runtime_adapter - .get_chunk_producer( - block.header().epoch_id(), - block.header().height(), - chunk.shard_id(), - ) - .ok(), - gas_used: chunk.gas_used(), - processing_time_ms: CryptoHashTimer::get_timer_value(chunk.chunk_hash().0) - .map(|s| s.as_millis() as u64), - }) - .collect(); - blocks_debug.push(DebugBlockStatus { - block_hash: last_block_hash, - block_height: block.header().height(), - block_producer: block_producer, - chunks, - processing_time_ms: CryptoHashTimer::get_timer_value(last_block_hash) - .map(|s| s.as_millis() as u64), - timestamp_delta: if last_block_timestamp > 0 { - last_block_timestamp.saturating_sub(block.header().raw_timestamp()) - } else { - 0 - }, - gas_price_ratio: block.header().gas_price() as f64 / initial_gas_price as f64, - }); - last_block_hash = block.header().prev_hash().clone(); - last_block_timestamp = block.header().raw_timestamp(); - last_block_height = block.header().height(); + let chunks = match &block { + Some(block) => block + .chunks() + .iter() + .map(|chunk| DebugChunkStatus { + shard_id: chunk.shard_id(), + chunk_hash: chunk.chunk_hash(), + chunk_producer: self + .client + .runtime_adapter + .get_chunk_producer( + block_header.epoch_id(), + block_header.height(), + chunk.shard_id(), + ) + .ok(), + gas_used: chunk.gas_used(), + processing_time_ms: CryptoHashTimer::get_timer_value( + chunk.chunk_hash().0, + ) + .map(|s| s.as_millis() as u64), + }) + .collect(), + None => vec![], + }; + + blocks.insert( + block_hash, + DebugBlockStatus { + block_hash, + prev_block_hash: *block_header.prev_hash(), + block_height: block_header.height(), + block_producer, + full_block_missing: block.is_none(), + is_on_canonical_chain, + chunks, + processing_time_ms: CryptoHashTimer::get_timer_value(block_hash) + .map(|s| s.as_millis() as u64), + block_timestamp: block_header.raw_timestamp(), + gas_price_ratio: block_header.gas_price() as f64 / initial_gas_price as f64, + }, + ); + // TODO(robin): using last epoch id when iterating in reverse height direction is + // not a good idea for calculating producer of missing heights. Revisit this. + last_epoch_id = block_header.epoch_id().clone(); + if let Some(prev_height) = block_header.prev_height() { + if block_header.height() != prev_height + 1 { + // This block was produced using a Skip approval; make sure to fetch the + // previous block even if it's very far back so we can better understand + // the skip. + // TODO(robin): A better heuristic can be used to determine how far back + // to fetch additional blocks. + block_hashes_to_force_fetch.insert(*block_header.prev_hash()); + } + } + } } - Ok(blocks_debug) + Ok(DebugBlockStatusData { + head: head.last_block_hash, + header_head: header_head.last_block_hash, + missed_heights, + blocks: blocks.into_values().collect(), + }) } /// Returns debugging information about the validator - including things like which approvals were received, which blocks/chunks will be diff --git a/chain/jsonrpc-primitives/src/types/status.rs b/chain/jsonrpc-primitives/src/types/status.rs index 7e335ecb98f..d7c59c97c94 100644 --- a/chain/jsonrpc-primitives/src/types/status.rs +++ b/chain/jsonrpc-primitives/src/types/status.rs @@ -1,5 +1,5 @@ use near_client_primitives::debug::{ - DebugBlockStatus, EpochInfoView, TrackedShardsView, ValidatorStatus, + DebugBlockStatusData, EpochInfoView, TrackedShardsView, ValidatorStatus, }; use near_primitives::views::{CatchupStatusView, PeerStoreView, SyncStatusView}; use serde::{Deserialize, Serialize}; @@ -18,7 +18,7 @@ pub enum DebugStatusResponse { // List of epochs - in descending order (next epoch is first). EpochInfo(Vec), // Detailed information about blocks. - BlockStatus(Vec), + BlockStatus(DebugBlockStatusData), // Detailed information about the validator (approvals, block & chunk production etc.) ValidatorStatus(ValidatorStatus), PeerStore(PeerStoreView), diff --git a/chain/jsonrpc/res/last_blocks.html b/chain/jsonrpc/res/last_blocks.html index 5bca3196e85..1629d0900bd 100644 --- a/chain/jsonrpc/res/last_blocks.html +++ b/chain/jsonrpc/res/last_blocks.html @@ -2,9 +2,24 @@ - + + + + + + + -

- Most recent blocks. -

- Skipped chunks have grey background.
- Skipped blocks have hashes set to 11111.
-

-

- Red text means that we don't know this producer (it is not present in our announce account list).
-
-
- - - - - -
-
+
+ + \ No newline at end of file diff --git a/chain/jsonrpc/res/last_blocks.js b/chain/jsonrpc/res/last_blocks.js new file mode 100644 index 00000000000..4bce651068b --- /dev/null +++ b/chain/jsonrpc/res/last_blocks.js @@ -0,0 +1,346 @@ +function ellipsify(str, maxLen) { + if (str.length > maxLen) { + return str.substring(0, maxLen - 3) + '...'; + } + return str; +} + +// Makes an element that when clicked, expands or ellipsifies the hash and creator. +function HashElement({ hashValue, creator, expandAll, knownProducers }) { + let [expanded, setExpanded] = React.useState(false); + let updateXarrow = reactXarrow.useXarrow(); + return { + setExpanded((value) => !value); + // xarrows need to be updated whenever graph dot positions may change. + updateXarrow(); + }}> + {expanded || expandAll + ? `${hashValue} ${creator}` + : `${ellipsify(hashValue, 8)} ${ellipsify(creator, 13)}`} + ; +} + +// Sorts the API response into easily displayable rows, and computes the graph layout. +// +// Inputs: +// blocks: array of DebugBlockStatus +// missedHeights: array of MissedHeightInfo +// head: block hash of the chain's head +// headerHead: block hash of the chain's header head +// Output: array of elements where each element is either { +// block: DebugBlockStatus, +// parentIndex: number?, // the index of the parent block, or null if parent not included in the data +// graphColumn: number, // the column to display the graph node in +// blockDelay: number?, // number of seconds since parent's block timestamp, or null if parent not included in the data +// chunkSkipped: boolean[], // for each chunk, whether the chunk is the same as that chunk of parent block +// isHead: boolean, +// isHeaderHead: boolean, +// } or { missedHeight: MissedHeightInfo } +function sortBlocksAndDetermineBlockGraphLayout(blocks, missedHeights, head, headerHead) { + const rows = []; + for (let block of blocks) { + rows.push({ + block, + parentIndex: null, + graphColumn: -1, + blockDelay: null, + chunkSkipped: block.chunks.map(() => false), + isHead: head == block.block_hash, + isHeaderHead: headerHead == block.block_hash, + }); + } + for (let missedHeight of missedHeights) { + rows.push({ missedHeight }); + } + + function sortingKey(row) { + if ('block' in row) { + // some lousy tie-breaking for same-height rows. + return row.block.block_height + (row.block.block_timestamp / 1e12 % 1); + } else { + return row.missedHeight.block_height; + } + } + + rows.sort((a, b) => sortingKey(b) - sortingKey(a)); + + const rowIndexByHash = new Map(); + rows.forEach((row, rowIndex) => { + if ('block' in row) { + rowIndexByHash.set(row.block.block_hash, rowIndex); + } + }); + + let highestNodeOnFirstColumn = rows.length; + for (let i = rows.length - 1; i >= 0; i--) { + let row = rows[i]; + if ('missedHeight' in row) { + continue; + } + const block = row.block; + + // Look up parent index, and also compute things that depend on the parent block. + if (rowIndexByHash.has(block.prev_block_hash)) { + row.parentIndex = rowIndexByHash.get(block.prev_block_hash); + const parent = rows[row.parentIndex]; + row.blockDelay = (block.block_timestamp - parent.block.block_timestamp) / 1e9; + for (let j = 0; + j < Math.min(block.chunks.length, parent.block.chunks.length); + j++) { + row.chunkSkipped[j] = + block.chunks[j].chunk_hash == parent.block.chunks[j].chunk_hash; + } + } + // We'll use a two-column layout for the block graph. We traverse from bottom + // up (oldest block to latest), and for each row we pick the first column unless + // that would make us draw a line (from the parent to this node) through another + // node; in which case we would pick the second column. To do that we just need + // to keep track of the highest node we've seen so far for the first column. + // + // Not the best layout for a graph, but it's sufficient since we rarely have forks. + let column = 0; + if (row.parentIndex != null && + rows[row.parentIndex].graphColumn == 0 && + row.parentIndex > highestNodeOnFirstColumn) { + column = 1; + } else { + highestNodeOnFirstColumn = i; + } + row.graphColumn = column; + } + return rows; +} + +function BlocksTable({ rows, knownProducers, expandAll, hideMissingHeights }) { + let numGraphColumns = 1; // either 1 or 2; determines the width of leftmost td + let numShards = 0; + for (let row of rows) { + if ('block' in row) { + numGraphColumns = Math.max(numGraphColumns, row.graphColumn + 1); + for (let chunk of row.block.chunks) { + numShards = Math.max(numShards, chunk.shard_id + 1); + } + } + } + const header = + Chain + Height + {'Hash & creator'} + Processing Time (ms) + Block Delay (s) + Gas price ratio + {[...Array(numShards).keys()].map(i => + Shard {i} (hash/gas(Tgas)/time(ms)))} + ; + + // One xarrow element per arrow (from block to block). + const graphArrows = []; + + // One 'tr' element per row. + const tableRows = []; + for (let i = 0; i < rows.length; i++) { + const row = rows[i]; + if ('missedHeight' in row) { + if (!hideMissingHeights) { + tableRows.push( + + {row.missedHeight.block_height} + {row.missedHeight.block_producer} missed block + ); + } + continue; + } + let block = row.block; + + const chunkCells = []; + block.chunks.forEach((chunk, shardId) => { + chunkCells.push( + + + + {(chunk.gas_used / (1024 * 1024 * 1024 * 1024)).toFixed(1)} + {chunk.processing_time_ms} + ); + }); + + tableRows.push( + + +
+
+ + + {block.block_height} + {row.isHead &&
HEAD
} + {row.isHeaderHead &&
HEADER HEAD
} + + + + + {block.processing_time_ms} + {row.blockDelay ?? ''} + {block.gas_price_ratio} + {block.full_block_missing && header only} + {chunkCells} + ); + if (row.parentIndex != null) { + graphArrows.push(); + } + } + return
+ {graphArrows} + + + {header} + {tableRows} + +
+
+} + +function Page() { + const [rows, setRows] = React.useState([]); + const [error, setError] = React.useState(null); + const [knownProducers, setKnownProducers] = React.useState(new Set()); + const [expandAll, setExpandAll] = React.useState(false); + const [hideMissingHeights, setHideMissingHeights] = React.useState(false); + const updateXarrow = reactXarrow.useXarrow(); + let blockStatusApiPath = '/debug/api/block_status'; + const url = new URL(window.location.toString()); + let title = 'Most Recent Blocks'; + if (url.searchParams.has('height')) { + blockStatusApiPath += '/' + url.searchParams.get('height'); + title = 'Blocks from ' + url.searchParams.get('height'); + } + // useEffect with empty dependency list means to run this once at beginning. + React.useEffect(() => { + (async () => { + try { + let resp = await fetch('/debug/api/status'); + if (resp.status == 405) { + throw new Error('Debug not allowed - did you set enable_debug_rpc: true in your config?'); + } else if (!resp.ok) { + throw new Error('Debug API call failed: ' + resp.statusText); + } + const { detailed_debug_status: { network_info: { known_producers } } } = await resp.json(); + const knownProducerSet = new Set(); + for (const producer of known_producers) { + knownProducerSet.add(producer.account_id); + } + setKnownProducers(knownProducerSet); + + resp = await fetch(blockStatusApiPath); + if (!resp.ok) { + throw new Error('Could not fetch block debug status: ' + resp.statusText); + } + const { status_response: { BlockStatus: data } } = await resp.json(); + setRows(sortBlocksAndDetermineBlockGraphLayout( + data.blocks, + data.missed_heights, + data.head, + data.header_head)); + } catch (error) { + setError(error); + } + })(); + }, []); + + // Compute missing blocks and chunks statistics (whenever rows changes). + const { numCanonicalBlocks, canonicalHeightCount, numChunksSkipped } = React.useMemo(() => { + let firstCanonicalHeight = 0; + let lastCanonicalHeight = 0; + let numCanonicalBlocks = 0; + const numChunksSkipped = []; + for (const row of rows) { + if (!('block' in row)) { + continue; + } + const block = row.block; + if (!block.is_on_canonical_chain) { + continue; + } + if (firstCanonicalHeight == 0) { + firstCanonicalHeight = block.block_height; + } + lastCanonicalHeight = block.block_height; + numCanonicalBlocks++; + for (let i = 0; i < row.chunkSkipped.length; i++) { + while (numChunksSkipped.length < i + 1) { + numChunksSkipped.push(0); + } + if (row.chunkSkipped[i]) { + numChunksSkipped[i]++; + } + } + } + return { + numCanonicalBlocks, + canonicalHeightCount: firstCanonicalHeight - lastCanonicalHeight + 1, + numChunksSkipped, + }; + }, [rows]); + + return +

{title}

+
Skipped chunks have grey background.
+
+ Red text means that we don't know this producer + (it's not present in our announce account list). +
+ {error &&
{error.stack}
} +
+ Missing blocks: {canonicalHeightCount - numCanonicalBlocks} { } + Produced: {numCanonicalBlocks} { } + Missing Rate: {((canonicalHeightCount - numCanonicalBlocks) / canonicalHeightCount * 100).toFixed(2)}% +
+
+ {numChunksSkipped.map((numSkipped, shardId) => +
+ Shard {shardId}: Missing chunks: {numSkipped} { } + Produced: {numCanonicalBlocks - numSkipped} { } + Missing Rate: {(numSkipped / numCanonicalBlocks * 100).toFixed(2)}% +
)} +
+ + + + +
; +} + +ReactDOM + .createRoot(document.getElementById('react-container')) + .render(); diff --git a/chain/jsonrpc/src/lib.rs b/chain/jsonrpc/src/lib.rs index 20f5261c547..1d99dba85f6 100644 --- a/chain/jsonrpc/src/lib.rs +++ b/chain/jsonrpc/src/lib.rs @@ -31,7 +31,7 @@ use near_jsonrpc_primitives::types::config::RpcProtocolConfigResponse; use near_o11y::metrics::{prometheus, Encoder, TextEncoder}; use near_primitives::hash::CryptoHash; use near_primitives::transaction::SignedTransaction; -use near_primitives::types::AccountId; +use near_primitives::types::{AccountId, BlockHeight}; use near_primitives::views::FinalExecutionOutcomeViewEnum; mod api; @@ -788,7 +788,7 @@ impl JsonRpcHandler { self.client_send(DebugStatus::EpochInfo).await?.rpc_into() } "/debug/api/block_status" => { - self.client_send(DebugStatus::BlockStatus).await?.rpc_into() + self.client_send(DebugStatus::BlockStatus(None)).await?.rpc_into() } "/debug/api/validator_status" => { self.client_send(DebugStatus::ValidatorStatus).await?.rpc_into() @@ -807,6 +807,24 @@ impl JsonRpcHandler { } } + pub async fn debug_block_status( + &self, + starting_height: Option, + ) -> Result< + Option, + near_jsonrpc_primitives::types::status::RpcStatusError, + > { + if self.enable_debug_rpc { + let debug_status = + self.client_send(DebugStatus::BlockStatus(starting_height)).await?.rpc_into(); + return Ok(Some(near_jsonrpc_primitives::types::status::RpcDebugStatusResponse { + status_response: debug_status, + })); + } else { + return Ok(None); + } + } + pub async fn protocol_config( &self, request_data: near_jsonrpc_primitives::types::config::RpcProtocolConfigRequest, @@ -1330,6 +1348,17 @@ async fn debug_handler( } } +async fn debug_block_status_handler( + path: web::Path, + handler: web::Data, +) -> Result { + match handler.debug_block_status(Some(*path)).await { + Ok(Some(value)) => Ok(HttpResponse::Ok().json(&value)), + Ok(None) => Ok(HttpResponse::MethodNotAllowed().finish()), + Err(_) => Ok(HttpResponse::ServiceUnavailable().finish()), + } +} + fn health_handler( handler: web::Data, ) -> impl Future> { @@ -1404,6 +1433,7 @@ async fn display_debug_html( let content = match page_name.as_str() { "last_blocks" => Some(debug_page_string!("last_blocks.html", handler)), + "last_blocks.js" => Some(debug_page_string!("last_blocks.js", handler)), "network_info" => Some(debug_page_string!("network_info.html", handler)), "epoch_info" => Some(debug_page_string!("epoch_info.html", handler)), "chain_n_chunk_info" => Some(debug_page_string!("chain_n_chunk_info.html", handler)), @@ -1479,6 +1509,10 @@ pub fn start_http( .service(web::resource("/network_info").route(web::get().to(network_info_handler))) .service(web::resource("/metrics").route(web::get().to(prometheus_handler))) .service(web::resource("/debug/api/{api}").route(web::get().to(debug_handler))) + .service( + web::resource("/debug/api/block_status/{starting_height}") + .route(web::get().to(debug_block_status_handler)), + ) .service(debug_html) .service(display_debug_html) }) From df22fc7892340c80a3faf560a72786f0a112669e Mon Sep 17 00:00:00 2001 From: Jakob Meier Date: Wed, 26 Oct 2022 12:29:17 +0100 Subject: [PATCH 029/103] doc: fix gas section links and other small fixes (#7931) - links for gas_param sections in summary didn't work - rename gas_param to just gas - pin link to commit - also mention that we spend gas on tx other than wasm --- docs/SUMMARY.md | 8 ++++---- docs/architecture/{gas_params => gas}/README.md | 4 ++-- docs/architecture/{gas_params => gas}/estimator.md | 0 docs/architecture/{gas_params => gas}/gas_profile.md | 2 +- .../{gas_params => gas}/parameter_definition.md | 0 5 files changed, 7 insertions(+), 7 deletions(-) rename docs/architecture/{gas_params => gas}/README.md (88%) rename docs/architecture/{gas_params => gas}/estimator.md (100%) rename docs/architecture/{gas_params => gas}/gas_profile.md (89%) rename docs/architecture/{gas_params => gas}/parameter_definition.md (100%) diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index bcdf59435bf..f2b4d698905 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -12,10 +12,10 @@ - [Transaction Routing](./architecture/how/tx_routing.md) - [Trie](./architecture/trie.md) - [Network](./architecture/network.md) -- [Gas Cost Parameters](./architecture/gas_params/README.md) - - [Parameter Definitions](./parameter_definition.md) - - [Gas Profile](./gas_profile.md) - - [Runtime Parameter Estimator](./estimator.md) +- [Gas Cost Parameters](./architecture/gas/README.md) + - [Parameter Definitions](./architecture/gas/parameter_definition.md) + - [Gas Profile](./architecture/gas/gas_profile.md) + - [Runtime Parameter Estimator](./architecture/gas/estimator.md) # Practices diff --git a/docs/architecture/gas_params/README.md b/docs/architecture/gas/README.md similarity index 88% rename from docs/architecture/gas_params/README.md rename to docs/architecture/gas/README.md index d6715a9ecd9..74af103b4d2 100644 --- a/docs/architecture/gas_params/README.md +++ b/docs/architecture/gas/README.md @@ -1,7 +1,7 @@ # Gas Cost Parameters -NEAR charges gas when executing users' WASM code. The how and why is described -in other documents, such as [Gas basic +NEAR charges gas when executing users' WASM code and other transactions. The how +and why is described in other documents, such as [Gas basic concepts](https://docs.near.org/concepts/basics/transactions/gas), [Gas advanced concepts](https://docs.near.org/concepts/basics/transactions/gas-advanced), and [the runtime fee specification](https://nomicon.io/RuntimeSpec/Fees/). diff --git a/docs/architecture/gas_params/estimator.md b/docs/architecture/gas/estimator.md similarity index 100% rename from docs/architecture/gas_params/estimator.md rename to docs/architecture/gas/estimator.md diff --git a/docs/architecture/gas_params/gas_profile.md b/docs/architecture/gas/gas_profile.md similarity index 89% rename from docs/architecture/gas_params/gas_profile.md rename to docs/architecture/gas/gas_profile.md index b85fb5df2b1..b44d1585eb3 100644 --- a/docs/architecture/gas_params/gas_profile.md +++ b/docs/architecture/gas/gas_profile.md @@ -16,7 +16,7 @@ When charging gas for an action, the `ActionResult` can be updated directly. But when charging WASM costs, it would be too slow to do a context switch each time, Therefore, a fast gas counter exists that can be updated from within the VM. (See -[gas_counter.rs](https://github.com/near/nearcore/blob/master/runtime/near-vm-logic/src/gas_counter.rs)) +[gas_counter.rs](https://github.com/near/nearcore/blob/06711f8460f946b8d2042aa1df6abe03c5184767/runtime/near-vm-logic/src/gas_counter.rs)) At the end of a function call execution, the gas counter is read by the host and merged into the `ActionResult`. diff --git a/docs/architecture/gas_params/parameter_definition.md b/docs/architecture/gas/parameter_definition.md similarity index 100% rename from docs/architecture/gas_params/parameter_definition.md rename to docs/architecture/gas/parameter_definition.md From c58fb7dc4b4079653de7a4c576cb94d6a05b79fe Mon Sep 17 00:00:00 2001 From: pompon0 Date: Wed, 26 Oct 2022 14:02:37 +0200 Subject: [PATCH 030/103] moved PeerStore from PeerManagerActor to NetworkState. (#7890) Also removed some actix messages which are not needed any more. --- chain/network/src/config.rs | 62 ++- chain/network/src/config_json.rs | 3 - chain/network/src/peer/peer_actor.rs | 87 ++-- chain/network/src/peer/testonly.rs | 34 +- chain/network/src/peer/tests/communication.rs | 24 +- .../network/src/peer_manager/network_state.rs | 64 ++- .../src/peer_manager/peer_manager_actor.rs | 186 ++----- .../src/peer_manager/peer_store/mod.rs | 474 ++++++++++-------- .../src/peer_manager/peer_store/testonly.rs | 7 + .../src/peer_manager/peer_store/tests.rs | 164 +++--- chain/network/src/peer_manager/testonly.rs | 38 +- chain/network/src/peer_manager/tests.rs | 58 +++ chain/network/src/private_actix.rs | 4 +- chain/network/src/test_utils.rs | 2 +- chain/network/src/types.rs | 16 - .../src/tests/nearcore/node_cluster.rs | 2 +- .../src/tests/nearcore/stake_nodes.rs | 3 +- .../src/tests/nearcore/sync_nodes.rs | 8 +- .../src/tests/nearcore/sync_state_nodes.rs | 23 +- .../src/tests/network/peer_handshake.rs | 2 +- .../src/tests/network/routing.rs | 18 +- integration-tests/src/tests/network/runner.rs | 6 +- .../src/tests/network/stress_network.rs | 2 +- nearcore/res/example-config-gc.json | 4 - nearcore/res/example-config-no-gc.json | 4 - neard/src/cli.rs | 2 +- tools/chainsync-loadtest/src/main.rs | 2 +- 27 files changed, 684 insertions(+), 615 deletions(-) create mode 100644 chain/network/src/peer_manager/peer_store/testonly.rs diff --git a/chain/network/src/config.rs b/chain/network/src/config.rs index ce3fbea5532..0d40b04ad26 100644 --- a/chain/network/src/config.rs +++ b/chain/network/src/config.rs @@ -3,6 +3,7 @@ use crate::concurrency::demux; use crate::network_protocol::PeerAddr; use crate::network_protocol::PeerInfo; use crate::peer_manager::peer_manager_actor::Event; +use crate::peer_manager::peer_store; use crate::sink::Sink; use crate::time; use crate::types::ROUTED_MESSAGE_TTL; @@ -63,10 +64,9 @@ pub struct NetworkConfig { pub node_key: SecretKey, pub validator: Option, - pub boot_nodes: Vec, + pub peer_store: peer_store::Config, pub whitelist_nodes: Vec, pub handshake_timeout: time::Duration, - pub reconnect_delay: time::Duration, /// Maximum time between refreshing the peer list. pub monitor_peers_max_period: time::Duration, @@ -86,10 +86,6 @@ pub struct NetworkConfig { /// Lower bound of the number of connections to archival peers to keep /// if we are an archival node. pub archival_peer_connections_lower_bound: u32, - /// Duration of the ban for misbehaving peers. - pub ban_window: time::Duration, - /// Remove expired peers. - pub peer_expiration_duration: time::Duration, /// Maximum number of peer addresses we should ever send on PeersRequest. pub max_send_peers: u32, /// Duration for checking on stats from the peers. @@ -108,8 +104,6 @@ pub struct NetworkConfig { pub highest_peer_horizon: u64, /// Period between pushing network info to client pub push_info_period: time::Duration, - /// Nodes will not accept or try to establish connection to such peers. - pub blacklist: blacklist::Blacklist, /// Flag to disable outbound connections. When this flag is active, nodes will not try to /// establish connection with other nodes, but will accept incoming connection if other requirements /// are satisfied. @@ -123,8 +117,6 @@ pub struct NetworkConfig { pub accounts_data_broadcast_rate_limit: demux::RateLimit, /// features pub features: Features, - /// If true - connect only to the bootnodes. - pub connect_only_to_boot_nodes: bool, // Whether to ignore tombstones some time after startup. // @@ -170,14 +162,25 @@ impl NetworkConfig { "" => None, addr => Some(addr.parse().context("Failed to parse SocketAddr")?), }, - boot_nodes: if cfg.boot_nodes.is_empty() { - vec![] - } else { - cfg.boot_nodes - .split(',') - .map(|chunk| chunk.parse()) + peer_store: peer_store::Config { + boot_nodes: if cfg.boot_nodes.is_empty() { + vec![] + } else { + cfg.boot_nodes + .split(',') + .map(|chunk| chunk.parse()) + .collect::>() + .context("boot_nodes")? + }, + blacklist: cfg + .blacklist + .iter() + .map(|e| e.parse()) .collect::>() - .context("boot_nodes")? + .context("failed to parse blacklist")?, + connect_only_to_boot_nodes: cfg.experimental.connect_only_to_boot_nodes, + ban_window: cfg.ban_window.try_into()?, + peer_expiration_duration: cfg.peer_expiration_duration.try_into()?, }, whitelist_nodes: if cfg.whitelist_nodes.is_empty() { vec![] @@ -195,7 +198,6 @@ impl NetworkConfig { .context("whitelist_nodes")? }, handshake_timeout: cfg.handshake_timeout.try_into()?, - reconnect_delay: cfg.reconnect_delay.try_into()?, monitor_peers_max_period: cfg.monitor_peers_max_period.try_into()?, max_num_peers: cfg.max_num_peers, minimum_outbound_peers: cfg.minimum_outbound_peers, @@ -204,27 +206,18 @@ impl NetworkConfig { peer_recent_time_window: cfg.peer_recent_time_window.try_into()?, safe_set_size: cfg.safe_set_size, archival_peer_connections_lower_bound: cfg.archival_peer_connections_lower_bound, - ban_window: cfg.ban_window.try_into()?, max_send_peers: 512, - peer_expiration_duration: cfg.peer_expiration_duration.try_into()?, peer_stats_period: cfg.peer_stats_period.try_into()?, ttl_account_id_router: cfg.ttl_account_id_router.try_into()?, routed_message_ttl: ROUTED_MESSAGE_TTL, max_routes_to_store: MAX_ROUTES_TO_STORE, highest_peer_horizon: HIGHEST_PEER_HORIZON, push_info_period: time::Duration::milliseconds(100), - blacklist: cfg - .blacklist - .iter() - .map(|e| e.parse()) - .collect::>() - .context("failed to parse blacklist")?, outbound_disabled: false, archive, accounts_data_broadcast_rate_limit: demux::RateLimit { qps: 0.1, burst: 1 }, features, inbound_disabled: cfg.experimental.inbound_disabled, - connect_only_to_boot_nodes: cfg.experimental.connect_only_to_boot_nodes, skip_tombstones: if cfg.experimental.skip_sending_tombstones_seconds > 0 { Some(time::Duration::seconds(cfg.experimental.skip_sending_tombstones_seconds)) } else { @@ -259,10 +252,15 @@ impl NetworkConfig { node_addr: Some(node_addr), node_key, validator: Some(validator), - boot_nodes: vec![], + peer_store: peer_store::Config { + boot_nodes: vec![], + blacklist: blacklist::Blacklist::default(), + ban_window: time::Duration::seconds(1), + peer_expiration_duration: time::Duration::seconds(60 * 60), + connect_only_to_boot_nodes: false, + }, whitelist_nodes: vec![], - handshake_timeout: time::Duration::seconds(60), - reconnect_delay: time::Duration::seconds(60), + handshake_timeout: time::Duration::seconds(5), monitor_peers_max_period: time::Duration::seconds(100), max_num_peers: 40, minimum_outbound_peers: 5, @@ -271,8 +269,6 @@ impl NetworkConfig { peer_recent_time_window: time::Duration::seconds(600), safe_set_size: 20, archival_peer_connections_lower_bound: 10, - ban_window: time::Duration::seconds(1), - peer_expiration_duration: time::Duration::seconds(60 * 60), max_send_peers: 512, peer_stats_period: time::Duration::seconds(5), ttl_account_id_router: time::Duration::seconds(60 * 60), @@ -280,10 +276,8 @@ impl NetworkConfig { max_routes_to_store: 1, highest_peer_horizon: 5, push_info_period: time::Duration::milliseconds(100), - blacklist: blacklist::Blacklist::default(), outbound_disabled: false, inbound_disabled: false, - connect_only_to_boot_nodes: false, archive: false, accounts_data_broadcast_rate_limit: demux::RateLimit { qps: 100., burst: 1000000 }, features: Features { enable_tier1: true }, diff --git a/chain/network/src/config_json.rs b/chain/network/src/config_json.rs index 130de190dc8..1155e3ce450 100644 --- a/chain/network/src/config_json.rs +++ b/chain/network/src/config_json.rs @@ -106,8 +106,6 @@ pub struct Config { pub archival_peer_connections_lower_bound: u32, /// Handshake timeout. pub handshake_timeout: Duration, - /// Duration before trying to reconnect to a peer. - pub reconnect_delay: Duration, /// Skip waiting for peers before starting node. pub skip_sync_wait: bool, /// Ban window for peers who misbehave. @@ -216,7 +214,6 @@ impl Default for Config { safe_set_size: default_safe_set_size(), archival_peer_connections_lower_bound: default_archival_peer_connections_lower_bound(), handshake_timeout: Duration::from_secs(20), - reconnect_delay: Duration::from_secs(60), skip_sync_wait: false, ban_window: Duration::from_secs(3 * 60 * 60), blacklist: vec![], diff --git a/chain/network/src/peer/peer_actor.rs b/chain/network/src/peer/peer_actor.rs index bfd066d6c2e..f17e0a4c2c4 100644 --- a/chain/network/src/peer/peer_actor.rs +++ b/chain/network/src/peer/peer_actor.rs @@ -12,14 +12,14 @@ use crate::peer_manager::network_state::NetworkState; use crate::peer_manager::peer_manager_actor::Event; use crate::private_actix::{ PeerToManagerMsg, PeerToManagerMsgResp, PeersRequest, PeersResponse, RegisterPeer, - RegisterPeerError, RegisterPeerResponse, SendMessage, Unregister, + RegisterPeerError, RegisterPeerResponse, SendMessage, }; use crate::routing::edge::verify_nonce; use crate::stats::metrics; use crate::tcp; use crate::time; use crate::types::{ - Ban, Handshake, HandshakeFailureReason, PeerIdOrHash, PeerMessage, PeerType, ReasonForBan, + Handshake, HandshakeFailureReason, PeerIdOrHash, PeerMessage, PeerType, ReasonForBan, }; use actix::fut::future::wrap_future; use actix::{Actor, ActorContext, ActorFutureExt, AsyncContext, Context, Handler, Running}; @@ -60,6 +60,9 @@ const ROUTED_MESSAGE_CACHE_SIZE: usize = 1000; /// Duplicated messages will be dropped if routed through the same peer multiple times. const DROP_DUPLICATED_MESSAGES_PERIOD: time::Duration = time::Duration::milliseconds(50); +// TODO(gprusak): this delay is unnecessary, drop it. +const WAIT_FOR_SYNC_DELAY: time::Duration = time::Duration::milliseconds(1_000); + #[derive(Debug, Clone, PartialEq, Eq)] pub struct ConnectionClosedEvent { pub(crate) stream_id: tcp::StreamId, @@ -278,14 +281,8 @@ impl PeerActor { } fn send_message(&self, msg: &PeerMessage) { - // Rate limit PeersRequest messages. - // TODO(gprusak): upgrade it to a more general rate limiting. if let (PeerStatus::Ready(conn), PeerMessage::PeersRequest) = (&self.peer_status, msg) { - let now = self.clock.now(); - match conn.last_time_peer_requested.load() { - Some(last) if now < last + REQUEST_PEERS_INTERVAL => return, - _ => conn.last_time_peer_requested.store(Some(now)), - } + conn.last_time_peer_requested.store(Some(self.clock.now())); } if let Some(enc) = self.encoding() { return self.send_message_with_encoding(msg, enc); @@ -587,7 +584,30 @@ impl PeerActor { incremental: false, requesting_full_sync: true, })); + // Only broadcast the new edge from the outbound endpoint. + act.network_state.tier2.broadcast_message(Arc::new(PeerMessage::SyncRoutingTable( + RoutingTableUpdate::from_edges(vec![conn.edge.clone()]), + ))); } + ctx.spawn(wrap_future(async { + tokio::time::sleep(WAIT_FOR_SYNC_DELAY.try_into().unwrap()).await; + }).map(|_,act:&mut Self,_|{ + // Sync the RoutingTable. + act.sync_routing_table(); + })); + // Exchange peers periodically. + ctx.spawn(wrap_future({ + let conn = conn.clone(); + async move { + let mut interval = + tokio::time::interval(REQUEST_PEERS_INTERVAL.try_into().unwrap()); + interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip); + loop { + interval.tick().await; + conn.send_message(Arc::new(PeerMessage::PeersRequest)); + } + } + })); act.network_state.config.event_sink.push(Event::HandshakeCompleted(HandshakeCompletedEvent{ stream_id: act.stream_id, edge: conn.edge.clone(), @@ -608,6 +628,21 @@ impl PeerActor { ); } + // Send full RoutingTable. + fn sync_routing_table(&self) { + let mut known_edges: Vec = + self.network_state.graph.read().edges().values().cloned().collect(); + if self.network_state.config.skip_tombstones.is_some() { + known_edges.retain(|edge| edge.removal_info().is_none()); + metrics::EDGE_TOMBSTONE_SENDING_SKIPPED.inc(); + } + let known_accounts = self.network_state.routing_table_view.get_announce_accounts(); + self.send_message_or_log(&PeerMessage::SyncRoutingTable(RoutingTableUpdate::new( + known_edges, + known_accounts, + ))); + } + fn handle_msg_connecting(&mut self, ctx: &mut actix::Context, msg: PeerMessage) { match (&mut self.peer_status, msg) { ( @@ -1021,9 +1056,7 @@ impl PeerActor { .event_sink .push(Event::MessageProcessed(PeerMessage::Routed(msg))); } - _ => { - self.receive_message(ctx, conn, PeerMessage::Routed(msg.clone())); - } + _ => self.receive_message(ctx, conn, PeerMessage::Routed(msg.clone())), } } else { if msg.decrease_ttl() { @@ -1123,28 +1156,14 @@ impl Actor for PeerActor { // so there is nothing to be done. PeerStatus::Connecting(..) => {} // Clean up the Connection from the NetworkState. - PeerStatus::Ready(conn) => { - self.network_state.unregister(conn); - // Save the fact that we are disconnecting to the PeerStore. - match &self.closing_reason { - Some(ClosingReason::Ban(ban_reason)) => { - warn!(target: "network", "Banning peer {} for {:?}", self.peer_info, ban_reason); - self.network_state.peer_manager_addr.do_send( - PeerToManagerMsg::Ban(Ban { - peer_id: conn.peer_info.id.clone(), - ban_reason: *ban_reason, - }) - .with_span_context(), - ); - } - _ => self.network_state.peer_manager_addr.do_send( - PeerToManagerMsg::Unregister(Unregister { - peer_id: conn.peer_info.id.clone(), - }) - .with_span_context(), - ), - } - } + PeerStatus::Ready(conn) => self.network_state.unregister( + &self.clock, + conn, + match self.closing_reason { + Some(ClosingReason::Ban(reason)) => Some(reason), + _ => None, + }, + ), } Running::Stop } diff --git a/chain/network/src/peer/testonly.rs b/chain/network/src/peer/testonly.rs index 9d0cd8a0dfc..492be82259f 100644 --- a/chain/network/src/peer/testonly.rs +++ b/chain/network/src/peer/testonly.rs @@ -8,10 +8,9 @@ use crate::network_protocol::{ use crate::peer::peer_actor::{ClosingReason, PeerActor}; use crate::peer_manager::network_state::NetworkState; use crate::peer_manager::peer_manager_actor; +use crate::peer_manager::peer_store; use crate::private_actix::{PeerRequestResult, RegisterPeerResponse, SendMessage}; use crate::private_actix::{PeerToManagerMsg, PeerToManagerMsgResp}; -use crate::routing; -use crate::routing::routing_table_view::RoutingTableView; use crate::store; use crate::tcp; use crate::testonly::actix::ActixSystem; @@ -22,7 +21,6 @@ use actix::{Actor, Context, Handler}; use near_crypto::{InMemorySigner, Signature}; use near_o11y::{handler_debug_span, OpenTelemetrySpanExt, WithSpanContext, WithSpanContextExt}; use near_primitives::network::PeerId; -use parking_lot::RwLock; use std::sync::Arc; pub struct PeerConfig { @@ -92,7 +90,6 @@ impl Handler> for FakePeerManagerActor { }) } PeerToManagerMsg::PeersResponse(..) => PeerToManagerMsgResp::Empty, - PeerToManagerMsg::Unregister(_) => PeerToManagerMsgResp::Empty, _ => panic!("unsupported message"), } } @@ -162,12 +159,23 @@ impl PeerHandle { let fpm = FakePeerManagerActor { cfg: cfg.clone() }.start(); let fc = Arc::new(fake_client::Fake { event_sink: send.sink().compose(Event::Client) }); let store = store::Store::from(near_store::db::TestDB::new()); - let routing_table_view = RoutingTableView::new(store.clone(), cfg.id()); + let mut network_cfg = cfg.network.clone(); + network_cfg.event_sink = send.sink().compose(Event::Network); + let network_state = Arc::new(NetworkState::new( + &clock, + store.clone(), + peer_store::PeerStore::new(&clock, network_cfg.peer_store.clone(), store.clone()) + .unwrap(), + Arc::new(network_cfg.verify().unwrap()), + cfg.chain.genesis_id.clone(), + fc, + fpm.recipient(), + )); // WARNING: this is a hack to make PeerActor use a specific nonce if let (Some(nonce), tcp::StreamType::Outbound { peer_id }) = (&cfg.nonce, &stream.type_) { - routing_table_view.add_local_edges(&[Edge::new( + network_state.routing_table_view.add_local_edges(&[Edge::new( cfg.id(), peer_id.clone(), nonce - 1, @@ -175,20 +183,6 @@ impl PeerHandle { Signature::default(), )]); } - let mut network_cfg = cfg.network.clone(); - network_cfg.event_sink = send.sink().compose(Event::Network); - let network_graph = - Arc::new(RwLock::new(routing::GraphWithCache::new(network_cfg.node_id().clone()))); - let routing_table_addr = - routing::Actor::spawn(clock.clone(), store.clone(), network_graph.clone()); - let network_state = Arc::new(NetworkState::new( - Arc::new(network_cfg.verify().unwrap()), - cfg.chain.genesis_id.clone(), - fc, - fpm.recipient(), - routing_table_addr, - routing_table_view, - )); PeerActor::spawn(clock, stream, cfg.force_encoding, network_state).unwrap() }) .await; diff --git a/chain/network/src/peer/tests/communication.rs b/chain/network/src/peer/tests/communication.rs index 3e65a225d95..3d0de0f0a58 100644 --- a/chain/network/src/peer/tests/communication.rs +++ b/chain/network/src/peer/tests/communication.rs @@ -51,13 +51,13 @@ async fn test_peer_communication( } }; - // RequestUpdateNonce + tracing::info!(target:"test","RequestUpdateNonce"); let mut events = inbound.events.from_now(); let want = PeerMessage::RequestUpdateNonce(data::make_partial_edge(&mut rng)); outbound.send(want.clone()).await; events.recv_until(message_processed(want)).await; - // ReponseUpdateNonce + tracing::info!(target:"test","ReponseUpdateNonce"); let mut events = inbound.events.from_now(); let a = data::make_signer(&mut rng); let b = data::make_signer(&mut rng); @@ -65,45 +65,45 @@ async fn test_peer_communication( outbound.send(want.clone()).await; events.recv_until(message_processed(want)).await; - // PeersRequest -> PeersResponse + tracing::info!(target:"test","PeersRequest -> PeersResponse"); // This test is different from the rest, because we cannot skip sending the response back. let mut events = outbound.events.from_now(); let want = PeerMessage::PeersResponse(inbound.cfg.peers.clone()); outbound.send(PeerMessage::PeersRequest).await; events.recv_until(message_processed(want)).await; - // BlockRequest + tracing::info!(target:"test","BlockRequest"); let mut events = inbound.events.from_now(); let want = PeerMessage::BlockRequest(chain.blocks[5].hash().clone()); outbound.send(want.clone()).await; events.recv_until(message_processed(want)).await; - // Block + tracing::info!(target:"test","Block"); let mut events = inbound.events.from_now(); let want = PeerMessage::Block(chain.blocks[5].clone()); outbound.send(want.clone()).await; events.recv_until(message_processed(want)).await; - // BlockHeadersRequest + tracing::info!(target:"test","BlockHeadersRequest"); let mut events = inbound.events.from_now(); let want = PeerMessage::BlockHeadersRequest(chain.blocks.iter().map(|b| b.hash().clone()).collect()); outbound.send(want.clone()).await; events.recv_until(message_processed(want)).await; - // BlockHeaders + tracing::info!(target:"test","BlockHeaders"); let mut events = inbound.events.from_now(); let want = PeerMessage::BlockHeaders(chain.get_block_headers()); outbound.send(want.clone()).await; events.recv_until(message_processed(want)).await; - // SyncRoutingTable + tracing::info!(target:"test","SyncRoutingTable"); let mut events = inbound.events.from_now(); let want = PeerMessage::SyncRoutingTable(data::make_routing_table(&mut rng)); outbound.send(want.clone()).await; events.recv_until(message_processed(want)).await; - // PartialEncodedChunkRequest + tracing::info!(target:"test","PartialEncodedChunkRequest"); let mut events = inbound.events.from_now(); let want = PeerMessage::Routed(Box::new(outbound.routed_message( RoutedMessageBody::PartialEncodedChunkRequest(PartialEncodedChunkRequestMsg { @@ -118,7 +118,7 @@ async fn test_peer_communication( outbound.send(want.clone()).await; events.recv_until(message_processed(want)).await; - // PartialEncodedChunkResponse + tracing::info!(target:"test","PartialEncodedChunkResponse"); let mut events = inbound.events.from_now(); let want_hash = chain.blocks[3].chunks()[0].chunk_hash(); let want_parts = data::make_chunk_parts(chain.chunks[&want_hash].clone()); @@ -135,14 +135,14 @@ async fn test_peer_communication( outbound.send(want.clone()).await; events.recv_until(message_processed(want)).await; - // Transaction + tracing::info!(target:"test","Transaction"); let mut events = inbound.events.from_now(); let want = data::make_signed_transaction(&mut rng); let want = PeerMessage::Transaction(want); outbound.send(want.clone()).await; events.recv_until(message_processed(want)).await; - // Challenge + tracing::info!(target:"test","Challenge"); let mut events = inbound.events.from_now(); let want = PeerMessage::Challenge(data::make_challenge(&mut rng)); outbound.send(want.clone()).await; diff --git a/chain/network/src/peer_manager/network_state.rs b/chain/network/src/peer_manager/network_state.rs index 430bcdf4ebe..608e6b241e0 100644 --- a/chain/network/src/peer_manager/network_state.rs +++ b/chain/network/src/peer_manager/network_state.rs @@ -6,13 +6,15 @@ use crate::network_protocol::{ RoutedMessageBody, RoutedMessageV2, RoutingTableUpdate, }; use crate::peer_manager::connection; +use crate::peer_manager::peer_store; use crate::private_actix::{PeerToManagerMsg, ValidateEdgeList}; use crate::routing; use crate::routing::edge_validator_actor::EdgeValidatorHelper; use crate::routing::routing_table_view::RoutingTableView; use crate::stats::metrics; +use crate::store; use crate::time; -use crate::types::ChainInfo; +use crate::types::{ChainInfo, ReasonForBan}; use actix::Recipient; use arc_swap::ArcSwap; use near_o11y::{WithSpanContext, WithSpanContextExt}; @@ -20,6 +22,7 @@ use near_primitives::block::GenesisId; use near_primitives::hash::CryptoHash; use near_primitives::network::{AnnounceAccount, PeerId}; use near_primitives::types::AccountId; +use parking_lot::RwLock; use std::sync::atomic::AtomicUsize; use std::sync::Arc; use tracing::{debug, trace}; @@ -50,6 +53,10 @@ pub(crate) struct NetworkState { pub tier2: connection::Pool, /// Semaphore limiting inflight inbound handshakes. pub inbound_handshake_permits: Arc, + /// Peer store that provides read/write access to peers. + pub peer_store: peer_store::PeerStore, + /// A graph of the whole NEAR network. + pub graph: Arc>, /// View of the Routing table. It keeps: /// - routing information - how to route messages @@ -67,37 +74,33 @@ pub(crate) struct NetworkState { impl NetworkState { pub fn new( + clock: &time::Clock, + store: store::Store, + peer_store: peer_store::PeerStore, config: Arc, genesis_id: GenesisId, client: Arc, peer_manager_addr: Recipient>, - routing_table_addr: actix::Addr, - routing_table_view: RoutingTableView, ) -> Self { + let graph = Arc::new(RwLock::new(routing::GraphWithCache::new(config.node_id()))); Self { - routing_table_addr, + routing_table_addr: routing::Actor::spawn(clock.clone(), store.clone(), graph.clone()), + graph, genesis_id, client, peer_manager_addr, chain_info: Default::default(), tier2: connection::Pool::new(config.node_id()), inbound_handshake_permits: Arc::new(tokio::sync::Semaphore::new(LIMIT_PENDING_PEERS)), + peer_store, accounts_data: Arc::new(accounts_data::Cache::new()), - routing_table_view, + routing_table_view: RoutingTableView::new(store, config.node_id()), routing_table_exchange_helper: Default::default(), config, txns_since_last_block: AtomicUsize::new(0), } } - /// Query connected peers for more peers. - pub fn ask_for_more_peers(&self) { - let msg = Arc::new(PeerMessage::PeersRequest); - for peer in self.tier2.load().ready.values() { - peer.send_message(msg.clone()); - } - } - pub fn propose_edge(&self, peer1: &PeerId, with_nonce: Option) -> PartialEdgeInfo { // When we create a new edge we increase the latest nonce by 2 in case we miss a removal // proposal from our partner. @@ -107,9 +110,31 @@ impl NetworkState { PartialEdgeInfo::new(&self.config.node_id(), peer1, nonce, &self.config.node_key) } + /// Stops peer instance if it is still connected, + /// and then mark peer as banned in the peer store. + pub fn disconnect_and_ban( + &self, + clock: &time::Clock, + peer_id: &PeerId, + ban_reason: ReasonForBan, + ) { + let tier2 = self.tier2.load(); + if let Some(peer) = tier2.ready.get(peer_id) { + peer.stop(Some(ban_reason)); + } else { + if let Err(err) = self.peer_store.peer_ban(clock, peer_id, ban_reason) { + tracing::error!(target: "network", ?err, "Failed to save peer data"); + } + } + } + /// Removes the connection from the state. - // TODO(gprusak): move PeerManagerActor::unregister logic here as well. - pub fn unregister(&self, conn: &Arc) { + pub fn unregister( + &self, + clock: &time::Clock, + conn: &Arc, + ban_reason: Option, + ) { let peer_id = conn.peer_info.id.clone(); self.tier2.remove(&peer_id); @@ -124,6 +149,15 @@ impl NetworkState { ))); } } + + // Save the fact that we are disconnecting to the PeerStore. + let res = match ban_reason { + Some(ban_reason) => self.peer_store.peer_ban(&clock, &conn.peer_info.id, ban_reason), + None => self.peer_store.peer_disconnected(clock, &conn.peer_info.id), + }; + if let Err(err) = res { + tracing::error!(target: "network", ?err, "Failed to save peer data"); + } } /// Determine if the given target is referring to us. diff --git a/chain/network/src/peer_manager/peer_manager_actor.rs b/chain/network/src/peer_manager/peer_manager_actor.rs index 1d6e4e43c6c..e3d2d9901ae 100644 --- a/chain/network/src/peer_manager/peer_manager_actor.rs +++ b/chain/network/src/peer_manager/peer_manager_actor.rs @@ -9,21 +9,20 @@ use crate::network_protocol::{ use crate::peer::peer_actor::PeerActor; use crate::peer_manager::connection; use crate::peer_manager::network_state::NetworkState; -use crate::peer_manager::peer_store::PeerStore; +use crate::peer_manager::peer_store; use crate::private_actix::{ PeerRequestResult, PeersRequest, RegisterPeer, RegisterPeerError, RegisterPeerResponse, StopMsg, }; use crate::private_actix::{PeerToManagerMsg, PeerToManagerMsgResp, PeersResponse}; use crate::routing; -use crate::routing::routing_table_view::RoutingTableView; use crate::stats::metrics; use crate::store; use crate::tcp; use crate::time; use crate::types::{ - ConnectedPeerInfo, FullPeerInfo, GetNetworkInfo, KnownPeerStatus, KnownProducer, NetworkInfo, - NetworkRequests, NetworkResponses, PeerIdOrHash, PeerManagerMessageRequest, - PeerManagerMessageResponse, PeerType, ReasonForBan, SetChainInfo, + ConnectedPeerInfo, FullPeerInfo, GetNetworkInfo, KnownProducer, NetworkInfo, NetworkRequests, + NetworkResponses, PeerIdOrHash, PeerManagerMessageRequest, PeerManagerMessageResponse, + PeerType, ReasonForBan, SetChainInfo, }; use actix::fut::future::wrap_future; use actix::{ @@ -35,10 +34,9 @@ use anyhow::Context as _; use near_o11y::{handler_trace_span, OpenTelemetrySpanExt, WithSpanContext, WithSpanContextExt}; use near_performance_metrics_macros::perf; use near_primitives::block::GenesisId; -use near_primitives::network::{AnnounceAccount, PeerId}; +use near_primitives::network::PeerId; use near_primitives::types::AccountId; use near_primitives::views::{KnownPeerStateView, PeerStoreView}; -use parking_lot::RwLock; use rand::seq::IteratorRandom; use rand::thread_rng; use std::cmp::min; @@ -63,8 +61,6 @@ const MONITOR_PEERS_INITIAL_DURATION: time::Duration = time::Duration::milliseco const BROADCAST_VALIDATED_EDGES_INTERVAL: time::Duration = time::Duration::milliseconds(50); /// Maximum amount of time spend processing edges. const BROAD_CAST_EDGES_MAX_WORK_ALLOWED: time::Duration = time::Duration::milliseconds(50); -/// Delay syncinc for 1 second to avoid race condition -const WAIT_FOR_SYNC_DELAY: time::Duration = time::Duration::milliseconds(1_000); /// How often should we update the routing table const UPDATE_ROUTING_TABLE_INTERVAL: time::Duration = time::Duration::milliseconds(1_000); /// How often to report bandwidth stats. @@ -115,7 +111,7 @@ impl TryFrom<&PeerInfo> for WhitelistNode { /// Actor that manages peers connections. pub struct PeerManagerActor { - clock: time::Clock, + pub(crate) clock: time::Clock, /// Networking configuration. /// TODO(gprusak): this field is duplicated with /// NetworkState.config. Remove it from here. @@ -128,14 +124,6 @@ pub struct PeerManagerActor { max_num_peers: u32, /// Peer information for this node. my_peer_id: PeerId, - /// Peer store that provides read/write access to peers. - pub(crate) peer_store: PeerStore, - /// A graph of the whole NEAR network, shared between routing::Actor - /// and PeerManagerActor. PeerManagerActor should have read-only access to the graph. - /// TODO: this is an intermediate step towards replacing actix runtime with a - /// generic threadpool (or multiple pools) in the near-network crate. - /// It the threadpool setup, inevitably some of the state will be shared. - network_graph: Arc>, /// Flag that track whether we started attempts to establish outbound connections. started_connect_attempts: bool, /// Connected peers we have sent new edge update, but we haven't received response so far. @@ -268,25 +256,17 @@ impl PeerManagerActor { ) -> anyhow::Result> { let config = config.verify().context("config")?; let store = store::Store::from(store); - let peer_store = PeerStore::new( - &clock, - store.clone(), - &config.boot_nodes, - config.blacklist.clone(), - config.connect_only_to_boot_nodes, - ) - .map_err(|e| anyhow::Error::msg(e.to_string()))?; - debug!(target: "network", + let peer_store = + peer_store::PeerStore::new(&clock, config.peer_store.clone(), store.clone()) + .context("PeerStore::new")?; + tracing::debug!(target: "network", len = peer_store.len(), - boot_nodes = config.boot_nodes.len(), + boot_nodes = config.peer_store.boot_nodes.len(), banned = peer_store.count_banned(), "Found known peers"); - debug!(target: "network", blacklist = ?config.blacklist, "Blacklist"); + tracing::debug!(target: "network", blacklist = ?config.peer_store.blacklist, "Blacklist"); let my_peer_id = config.node_id(); - let network_graph = Arc::new(RwLock::new(routing::GraphWithCache::new(my_peer_id.clone()))); - let routing_table_addr = - routing::Actor::spawn(clock.clone(), store.clone(), network_graph.clone()); let whitelist_nodes = { let mut v = vec![]; for wn in &config.whitelist_nodes { @@ -299,19 +279,18 @@ impl PeerManagerActor { my_peer_id: my_peer_id.clone(), config: config.clone(), max_num_peers: config.max_num_peers, - peer_store, - network_graph, started_connect_attempts: false, local_peer_pending_update_nonce_request: HashMap::new(), whitelist_nodes, last_peer_outbound_attempt: Default::default(), state: Arc::new(NetworkState::new( + &clock, + store.clone(), + peer_store, config.clone(), genesis_id, client, ctx.address().recipient(), - routing_table_addr, - RoutingTableView::new(store, my_peer_id.clone()), )), clock, })) @@ -341,7 +320,7 @@ impl PeerManagerActor { }) => { act.state.routing_table_view.update(&pruned_edges, next_hops.clone()); for peer in peers_to_ban { - act.try_ban_peer(&peer, ReasonForBan::InvalidEdge); + act.state.disconnect_and_ban(&act.clock, &peer, ReasonForBan::InvalidEdge); } act.config .event_sink @@ -352,16 +331,6 @@ impl PeerManagerActor { .spawn(ctx); } - fn broadcast_accounts(&mut self, accounts: Vec) { - let new_accounts = self.state.routing_table_view.add_accounts(accounts); - debug!(target: "network", account_id = ?self.config.validator.as_ref().map(|v|v.account_id()), ?new_accounts, "Received new accounts"); - if new_accounts.len() > 0 { - self.state.tier2.broadcast_message(Arc::new(PeerMessage::SyncRoutingTable( - RoutingTableUpdate::from_accounts(new_accounts), - ))); - } - } - /// `update_routing_table_trigger` schedule updating routing table to `RoutingTableActor` /// Usually we do edge pruning once per hour. However it may be disabled in following cases: /// - there are edges, that were supposed to be added, but are still in EdgeValidatorActor, @@ -533,70 +502,19 @@ impl PeerManagerActor { fn register_peer( &mut self, connection: Arc, - ctx: &mut Context, ) -> Result<(), connection::PoolError> { let peer_info = &connection.peer_info; let _span = tracing::trace_span!(target: "network", "register_peer").entered(); debug!(target: "network", ?peer_info, "Consolidated connection"); self.state.tier2.insert_ready(connection.clone())?; // Best effort write to DB. - if let Err(err) = self.peer_store.peer_connected(&self.clock, peer_info) { + if let Err(err) = self.state.peer_store.peer_connected(&self.clock, peer_info) { error!(target: "network", ?err, "Failed to save peer data"); } self.state.add_verified_edges_to_routing_table(vec![connection.edge.clone()]); - self.sync_after_handshake(connection.clone(), ctx); Ok(()) } - fn sync_after_handshake(&self, peer: Arc, ctx: &mut Context) { - let run_later_span = tracing::trace_span!(target: "network", "sync_after_handshake"); - // The full sync is delayed, so that handshake is completed before the sync starts. - near_performance_metrics::actix::run_later( - ctx, - WAIT_FOR_SYNC_DELAY.try_into().unwrap(), - move |act, _ctx| { - let _guard = run_later_span.enter(); - // Start syncing network point of view. Wait until both parties are connected before start - // sending messages. - let mut known_edges: Vec = - act.network_graph.read().edges().values().cloned().collect(); - if act.config.skip_tombstones.is_some() { - known_edges.retain(|edge| edge.removal_info().is_none()); - metrics::EDGE_TOMBSTONE_SENDING_SKIPPED.inc(); - } - let known_accounts = act.state.routing_table_view.get_announce_accounts(); - peer.send_message(Arc::new(PeerMessage::SyncRoutingTable( - RoutingTableUpdate::new(known_edges, known_accounts), - ))); - - // Ask for peers list on connection. - peer.send_message(Arc::new(PeerMessage::PeersRequest)); - - if peer.peer_type == PeerType::Outbound { - // Only broadcast new message from the outbound endpoint. - // Wait a time out before broadcasting this new edge to let the other party finish handshake. - act.state.tier2.broadcast_message(Arc::new(PeerMessage::SyncRoutingTable( - RoutingTableUpdate::from_edges(vec![peer.edge.clone()]), - ))); - } - }, - ); - } - - /// Ban peer. Stop peer instance if it is still connected, - /// and then mark peer as banned in the peer store. - pub(crate) fn try_ban_peer(&mut self, peer_id: &PeerId, ban_reason: ReasonForBan) { - let state = self.state.clone(); - if let Some(peer) = state.tier2.load().ready.get(peer_id) { - peer.stop(Some(ban_reason)); - } else { - warn!(target: "network", ?ban_reason, ?peer_id, "Try to ban a disconnected peer for"); - if let Err(err) = self.peer_store.peer_ban(&self.clock, peer_id, ban_reason) { - tracing::error!(target: "network", ?err, "Failed to save peer data"); - }; - }; - } - /// Check if it is needed to create a new outbound connection. /// If the number of active connections is less than `ideal_connections_lo` or /// (the number of outgoing connections is less than `minimum_outbound_peers` @@ -853,26 +771,12 @@ impl PeerManagerActor { let _span = tracing::trace_span!(target: "network", "monitor_peers_trigger").entered(); let _timer = metrics::PEER_MANAGER_TRIGGER_TIME.with_label_values(&["monitor_peers"]).start_timer(); - let mut to_unban = vec![]; - for (peer_id, peer_state) in self.peer_store.iter() { - if let KnownPeerStatus::Banned(_, last_banned) = peer_state.status { - let interval = self.clock.now_utc() - last_banned; - if interval > self.config.ban_window { - info!(target: "network", unbanned = ?peer_id, after = ?interval, "Monitor peers:"); - to_unban.push(peer_id.clone()); - } - } - } - for peer_id in to_unban { - if let Err(err) = self.peer_store.peer_unban(&peer_id) { - error!(target: "network", ?err, "Failed to unban a peer"); - } - } + self.state.peer_store.unban(&self.clock); if self.is_outbound_bootstrap_needed() { let tier2 = self.state.tier2.load(); - if let Some(peer_info) = self.peer_store.unconnected_peer(|peer_state| { + if let Some(peer_info) = self.state.peer_store.unconnected_peer(|peer_state| { // Ignore connecting to ourself self.my_peer_id == peer_state.peer_info.id || self.config.node_addr == peer_state.peer_info.addr @@ -898,22 +802,20 @@ impl PeerManagerActor { } } })); - } else { - self.state.ask_for_more_peers(); } } // If there are too many active connections try to remove some connections self.maybe_stop_active_connection(); - if let Err(err) = self.peer_store.remove_expired(&self.clock, &self.config) { + if let Err(err) = self.state.peer_store.remove_expired(&self.clock) { error!(target: "network", ?err, "Failed to remove expired peers"); }; // Find peers that are not reliable (too much behind) - and make sure that we're not routing messages through them. let unreliable_peers = self.unreliable_peers(); metrics::PEER_UNRELIABLE.set(unreliable_peers.len() as i64); - self.network_graph.write().set_unreliable_peers(unreliable_peers); + self.state.graph.write().set_unreliable_peers(unreliable_peers); let new_interval = min(max_interval, interval * EXPONENTIAL_BACKOFF_RATIO); @@ -1097,11 +999,11 @@ impl PeerManagerActor { } } NetworkRequests::BanPeer { peer_id, ban_reason } => { - self.try_ban_peer(&peer_id, ban_reason); + self.state.disconnect_and_ban(&self.clock, &peer_id, ban_reason); NetworkResponses::NoResponse } NetworkRequests::AnnounceAccount(announce_account) => { - self.broadcast_accounts(vec![announce_account]); + self.state.broadcast_accounts(vec![announce_account]); NetworkResponses::NoResponse } NetworkRequests::PartialEncodedChunkRequest { target, request, create_time } => { @@ -1240,21 +1142,17 @@ impl PeerManagerActor { } #[perf] - fn handle_msg_register_peer( - &mut self, - msg: RegisterPeer, - ctx: &mut Context, - ) -> RegisterPeerResponse { + fn handle_msg_register_peer(&mut self, msg: RegisterPeer) -> RegisterPeerResponse { let _d = delay_detector::DelayDetector::new(|| "consolidate".into()); let peer_info = &msg.connection.peer_info; // Check if this is a blacklisted peer. - if peer_info.addr.as_ref().map_or(true, |addr| self.peer_store.is_blacklisted(addr)) { + if peer_info.addr.as_ref().map_or(true, |addr| self.state.peer_store.is_blacklisted(addr)) { debug!(target: "network", peer_info = ?peer_info, "Dropping connection from blacklisted peer or unknown address"); return RegisterPeerResponse::Reject(RegisterPeerError::Blacklisted); } - if self.peer_store.is_banned(&peer_info.id) { + if self.state.peer_store.is_banned(&peer_info.id) { debug!(target: "network", id = ?peer_info.id, "Dropping connection from banned peer"); return RegisterPeerResponse::Reject(RegisterPeerError::Banned); } @@ -1270,7 +1168,7 @@ impl PeerManagerActor { return RegisterPeerResponse::Reject(RegisterPeerError::ConnectionLimitExceeded); } } - if let Err(err) = self.register_peer(msg.connection.clone(), ctx) { + if let Err(err) = self.register_peer(msg.connection.clone()) { return RegisterPeerResponse::Reject(RegisterPeerError::PoolError(err)); } RegisterPeerResponse::Accept @@ -1280,13 +1178,13 @@ impl PeerManagerActor { fn handle_msg_peers_request(&self, _msg: PeersRequest) -> PeerRequestResult { let _d = delay_detector::DelayDetector::new(|| "peers request".into()); PeerRequestResult { - peers: self.peer_store.healthy_peers(self.config.max_send_peers as usize), + peers: self.state.peer_store.healthy_peers(self.config.max_send_peers as usize), } } fn handle_msg_peers_response(&mut self, msg: PeersResponse) { let _d = delay_detector::DelayDetector::new(|| "peers response".into()); - if let Err(err) = self.peer_store.add_indirect_peers( + if let Err(err) = self.state.peer_store.add_indirect_peers( &self.clock, msg.peers.into_iter().filter(|peer_info| peer_info.id != self.my_peer_id), ) { @@ -1333,14 +1231,10 @@ impl PeerManagerActor { } } - fn handle_peer_to_manager_msg( - &mut self, - msg: PeerToManagerMsg, - ctx: &mut Context, - ) -> PeerToManagerMsgResp { + fn handle_peer_to_manager_msg(&mut self, msg: PeerToManagerMsg) -> PeerToManagerMsgResp { match msg { PeerToManagerMsg::RegisterPeer(msg) => { - PeerToManagerMsgResp::RegisterPeer(self.handle_msg_register_peer(msg, ctx)) + PeerToManagerMsgResp::RegisterPeer(self.handle_msg_register_peer(msg)) } PeerToManagerMsg::PeersRequest(msg) => { PeerToManagerMsgResp::PeersRequest(self.handle_msg_peers_request(msg)) @@ -1350,21 +1244,11 @@ impl PeerManagerActor { PeerToManagerMsgResp::Empty } PeerToManagerMsg::UpdatePeerInfo(peer_info) => { - if let Err(err) = self.peer_store.add_direct_peer(&self.clock, peer_info) { + if let Err(err) = self.state.peer_store.add_direct_peer(&self.clock, peer_info) { error!(target: "network", ?err, "Fail to update peer store"); } PeerToManagerMsgResp::Empty } - PeerToManagerMsg::Unregister(msg) => { - if let Err(err) = self.peer_store.peer_disconnected(&self.clock, &msg.peer_id) { - tracing::error!(target: "network", ?err, "Failed to save peer data"); - } - PeerToManagerMsgResp::Empty - } - PeerToManagerMsg::Ban(msg) => { - self.try_ban_peer(&msg.peer_id, msg.ban_reason); - PeerToManagerMsgResp::Empty - } PeerToManagerMsg::RequestUpdateNonce(peer_id, edge_info) => { if Edge::partial_verify(&self.my_peer_id, &peer_id, &edge_info) { if let Some(cur_edge) = self.state.routing_table_view.get_local_edge(&peer_id) { @@ -1539,13 +1423,13 @@ impl Handler> for PeerManagerActor { fn handle( &mut self, msg: WithSpanContext, - ctx: &mut Self::Context, + _ctx: &mut Self::Context, ) -> Self::Result { let msg_type: &str = (&msg.msg).into(); let (_span, msg) = handler_trace_span!(target: "network", msg, msg_type); let _timer = metrics::PEER_MANAGER_MESSAGES_TIME.with_label_values(&[msg_type]).start_timer(); - self.handle_peer_to_manager_msg(msg, ctx) + self.handle_peer_to_manager_msg(msg) } } @@ -1570,7 +1454,9 @@ impl Handler for PeerManagerActor { match msg { GetDebugStatus::PeerStore => { let mut peer_states_view = self + .state .peer_store + .load() .iter() .map(|(peer_id, known_peer_state)| KnownPeerStateView { peer_id: peer_id.clone(), diff --git a/chain/network/src/peer_manager/peer_store/mod.rs b/chain/network/src/peer_manager/peer_store/mod.rs index c4c73185838..fc12e5e30ff 100644 --- a/chain/network/src/peer_manager/peer_store/mod.rs +++ b/chain/network/src/peer_manager/peer_store/mod.rs @@ -1,25 +1,25 @@ -use crate::blacklist::Blacklist; -use crate::config; +use crate::blacklist; use crate::network_protocol::PeerInfo; use crate::store; use crate::time; use crate::types::{KnownPeerState, KnownPeerStatus, ReasonForBan}; use anyhow::bail; +use im::hashmap::Entry; +use im::{HashMap, HashSet}; use near_primitives::network::PeerId; +use parking_lot::Mutex; use rand::seq::IteratorRandom; use rand::thread_rng; -use std::collections::hash_map::{Entry, Iter}; -use std::collections::HashMap; -use std::collections::HashSet; use std::net::SocketAddr; use std::ops::Not; -use tracing::{debug, error, info}; +#[cfg(test)] +mod testonly; #[cfg(test)] mod tests; /// Level of trust we have about a new (PeerId, Addr) pair. -#[derive(Eq, PartialEq, Debug, Clone)] +#[derive(Eq, PartialEq, Debug, Clone, Copy)] enum TrustLevel { /// We learn about it from other peers. Indirect, @@ -44,28 +44,197 @@ impl VerifiedPeer { } } +#[derive(Clone)] +pub struct Config { + /// A list of nodes to connect to on the first run of the neard server. + /// Once it connects to some of them, the server will learn about other + /// nodes in the network and will try to connect to them as well. + /// Sever will also store in DB the info about the nodes it learned about, + /// so that on the next run it has a larger choice of nodes to connect + /// to (rather than just the boot nodes). + /// + /// The recommended boot nodes are distributed together with the config.json + /// file, but you can modify the boot_nodes field to contain any nodes that + /// you trust. + pub boot_nodes: Vec, + /// Nodes will not accept or try to establish connection to such peers. + pub blacklist: blacklist::Blacklist, + /// If true - connect only to the bootnodes. + pub connect_only_to_boot_nodes: bool, + /// Remove expired peers. + pub peer_expiration_duration: time::Duration, + /// Duration of the ban for misbehaving peers. + pub ban_window: time::Duration, +} + /// Known peers store, maintaining cache of known peers and connection to storage to save/load them. -pub struct PeerStore { +struct Inner { + config: Config, store: store::Store, + boot_nodes: HashSet, peer_states: HashMap, // This is a reverse index, from physical address to peer_id // It can happens that some peers don't have known address, so // they will not be present in this list, otherwise they will be present. addr_peers: HashMap, - blacklist: Blacklist, - boot_nodes: HashSet, - connect_only_to_boot_nodes: bool, } +impl Inner { + /// Adds a peer which proved to have secret key associated with the ID. + /// + /// The host have sent us a message signed with a secret key corresponding + /// to the peer ID thus we can be sure that they control the secret key. + /// + /// See also [`Self::add_indirect_peers`] and [`Self::add_direct_peer`]. + fn add_signed_peer(&mut self, clock: &time::Clock, peer_info: PeerInfo) -> anyhow::Result<()> { + self.add_peer(clock, peer_info, TrustLevel::Signed) + } + + /// Adds a peer into the store with given trust level. + fn add_peer( + &mut self, + clock: &time::Clock, + peer_info: PeerInfo, + trust_level: TrustLevel, + ) -> anyhow::Result<()> { + if let Some(peer_addr) = peer_info.addr { + match trust_level { + TrustLevel::Signed => { + self.update_peer_info(clock, peer_info, peer_addr, TrustLevel::Signed)?; + } + TrustLevel::Direct => { + // If this peer already exists with a signed connection ignore this update. + // Warning: This is a problem for nodes that changes its address without changing peer_id. + // It is recommended to change peer_id if address is changed. + let trust_level = (|| { + let state = self.peer_states.get(&peer_info.id)?; + let addr = state.peer_info.addr?; + let verified_peer = self.addr_peers.get(&addr)?; + Some(verified_peer.trust_level) + })(); + if trust_level == Some(TrustLevel::Signed) { + return Ok(()); + } + self.update_peer_info(clock, peer_info, peer_addr, TrustLevel::Direct)?; + } + TrustLevel::Indirect => { + // We should only update an Indirect connection if we don't know anything about the peer + // or about the address. + if !self.peer_states.contains_key(&peer_info.id) + && !self.addr_peers.contains_key(&peer_addr) + { + self.update_peer_info(clock, peer_info, peer_addr, TrustLevel::Indirect)?; + } + } + } + } else { + // If doesn't have the address attached it is not verified and we add it + // only if it is unknown to us. + self.peer_states + .entry(peer_info.id.clone()) + .or_insert_with(|| KnownPeerState::new(peer_info, clock.now_utc())); + } + Ok(()) + } + + /// Copies the in-mem state of the peer to DB. + fn touch(&mut self, peer_id: &PeerId) -> anyhow::Result<()> { + Ok(match self.peer_states.get(peer_id) { + Some(peer_state) => self.store.set_peer_state(&peer_id, peer_state)?, + None => (), + }) + } + + fn peer_unban(&mut self, peer_id: &PeerId) -> anyhow::Result<()> { + if let Some(peer_state) = self.peer_states.get_mut(peer_id) { + peer_state.status = KnownPeerStatus::NotConnected; + self.store.set_peer_state(&peer_id, peer_state)?; + } else { + bail!("Peer {} is missing in the peer store", peer_id); + } + Ok(()) + } + + /// Deletes peers from the internal cache and the persistent store. + fn delete_peers(&mut self, peer_ids: &[PeerId]) -> anyhow::Result<()> { + for peer_id in peer_ids { + if let Some(peer_state) = self.peer_states.remove(peer_id) { + if let Some(addr) = peer_state.peer_info.addr { + self.addr_peers.remove(&addr); + } + } + } + Ok(self.store.delete_peer_states(peer_ids)?) + } + + /// Find a random subset of peers based on filter. + fn find_peers(&self, filter: F, count: usize) -> Vec + where + F: FnMut(&&KnownPeerState) -> bool, + { + (self.peer_states.values()) + .filter(filter) + .choose_multiple(&mut thread_rng(), count) + .into_iter() + .map(|kps| kps.peer_info.clone()) + .collect() + } + + /// Create new pair between peer_info.id and peer_addr removing + /// old pairs if necessary. + fn update_peer_info( + &mut self, + clock: &time::Clock, + peer_info: PeerInfo, + peer_addr: SocketAddr, + trust_level: TrustLevel, + ) -> anyhow::Result<()> { + let mut touch_other = None; + + // If there is a peer associated with current address remove the address from it. + if let Some(verified_peer) = self.addr_peers.remove(&peer_addr) { + self.peer_states.entry(verified_peer.peer_id).and_modify(|peer_state| { + peer_state.peer_info.addr = None; + touch_other = Some(peer_state.peer_info.id.clone()); + }); + } + + // If this peer already has an address, remove that pair from the index. + if let Some(peer_state) = self.peer_states.get_mut(&peer_info.id) { + if let Some(cur_addr) = peer_state.peer_info.addr.take() { + self.addr_peers.remove(&cur_addr); + } + } + + // Add new address + self.addr_peers + .insert(peer_addr, VerifiedPeer { peer_id: peer_info.id.clone(), trust_level }); + + let now = clock.now_utc(); + + // Update peer_id addr + self.peer_states + .entry(peer_info.id.clone()) + .and_modify(|peer_state| peer_state.peer_info.addr = Some(peer_addr)) + .or_insert_with(|| KnownPeerState::new(peer_info.clone(), now)); + + self.touch(&peer_info.id)?; + if let Some(touch_other) = touch_other { + self.touch(&touch_other)?; + } + Ok(()) + } +} + +pub(crate) struct PeerStore(Mutex); + impl PeerStore { pub(crate) fn new( clock: &time::Clock, + config: Config, store: store::Store, - boot_nodes: &[PeerInfo], - blacklist: Blacklist, - connect_only_to_boot_nodes: bool, ) -> anyhow::Result { - let boot_nodes_set: HashSet = boot_nodes.iter().map(|it| it.id.clone()).collect(); + let boot_nodes: HashSet<_> = config.boot_nodes.iter().map(|p| p.id.clone()).collect(); // A mapping from `PeerId` to `KnownPeerState`. let mut peerid_2_state = HashMap::default(); // Stores mapping from `SocketAddr` to `VerifiedPeer`, which contains `PeerId`. @@ -74,9 +243,9 @@ impl PeerStore { let mut addr_2_peer = HashMap::default(); let now = clock.now_utc(); - for peer_info in boot_nodes { + for peer_info in &config.boot_nodes { if peerid_2_state.contains_key(&peer_info.id) { - error!(id = ?peer_info.id, "There is a duplicated peer in boot_nodes"); + tracing::error!(id = ?peer_info.id, "There is a duplicated peer in boot_nodes"); continue; } let peer_addr = match peer_info.addr { @@ -100,7 +269,7 @@ impl PeerStore { for (peer_id, peer_state) in store.list_peer_states()? { // If it’s already banned, keep it banned. Otherwise, it’s not connected. let status = if peer_state.status.is_banned() { - if connect_only_to_boot_nodes && boot_nodes_set.contains(&peer_id) { + if config.connect_only_to_boot_nodes && boot_nodes.contains(&peer_id) { // Give boot node another chance. KnownPeerStatus::NotConnected } else { @@ -118,9 +287,9 @@ impl PeerStore { }; let is_blacklisted = - peer_state.peer_info.addr.map_or(false, |addr| blacklist.contains(addr)); + peer_state.peer_info.addr.map_or(false, |addr| config.blacklist.contains(addr)); if is_blacklisted { - info!(target: "network", "Removing {:?} because address is blacklisted", peer_state.peer_info); + tracing::info!(target: "network", "Removing {:?} because address is blacklisted", peer_state.peer_info); peers_to_delete.push(peer_id); } else { peers_to_keep.push((peer_id, peer_state)); @@ -151,57 +320,58 @@ impl PeerStore { } } - let mut peer_store = PeerStore { + let mut peer_store = Inner { + config, store, + boot_nodes, peer_states: peerid_2_state, addr_peers: addr_2_peer, - blacklist, - boot_nodes: boot_nodes_set, - connect_only_to_boot_nodes, }; peer_store.delete_peers(&peers_to_delete)?; - Ok(peer_store) + Ok(PeerStore(Mutex::new(peer_store))) } pub fn is_blacklisted(&self, addr: &SocketAddr) -> bool { - self.blacklist.contains(*addr) + self.0.lock().config.blacklist.contains(*addr) } pub(crate) fn len(&self) -> usize { - self.peer_states.len() + self.0.lock().peer_states.len() } pub(crate) fn is_banned(&self, peer_id: &PeerId) -> bool { - self.peer_states - .get(peer_id) - .map_or(false, |known_peer_state| known_peer_state.status.is_banned()) + self.0.lock().peer_states.get(peer_id).map_or(false, |s| s.status.is_banned()) } pub(crate) fn count_banned(&self) -> usize { - self.peer_states.values().filter(|st| st.status.is_banned()).count() + self.0.lock().peer_states.values().filter(|st| st.status.is_banned()).count() } pub(crate) fn peer_connected( - &mut self, + &self, clock: &time::Clock, peer_info: &PeerInfo, ) -> anyhow::Result<()> { - self.add_signed_peer(clock, peer_info.clone())?; - let entry = self.peer_states.get_mut(&peer_info.id).unwrap(); + let mut inner = self.0.lock(); + inner.add_signed_peer(clock, peer_info.clone())?; + let mut store = inner.store.clone(); + let entry = inner.peer_states.get_mut(&peer_info.id).unwrap(); entry.last_seen = clock.now_utc(); entry.status = KnownPeerStatus::Connected; - Ok(self.store.set_peer_state(&peer_info.id, entry)?) + Ok(store.set_peer_state(&peer_info.id, entry)?) } pub(crate) fn peer_disconnected( - &mut self, + &self, clock: &time::Clock, peer_id: &PeerId, ) -> anyhow::Result<()> { - if let Some(peer_state) = self.peer_states.get_mut(peer_id) { + let mut inner = self.0.lock(); + let mut store = inner.store.clone(); + if let Some(peer_state) = inner.peer_states.get_mut(peer_id) { peer_state.last_seen = clock.now_utc(); peer_state.status = KnownPeerStatus::NotConnected; - self.store.set_peer_state(peer_id, peer_state)?; + store.set_peer_state(peer_id, peer_state)?; } else { bail!("Peer {} is missing in the peer store", peer_id); } @@ -209,70 +379,39 @@ impl PeerStore { } pub(crate) fn peer_ban( - &mut self, + &self, clock: &time::Clock, peer_id: &PeerId, ban_reason: ReasonForBan, ) -> anyhow::Result<()> { - if let Some(peer_state) = self.peer_states.get_mut(peer_id) { + tracing::warn!(target: "network", "Banning peer {} for {:?}", peer_id, ban_reason); + let mut inner = self.0.lock(); + let mut store = inner.store.clone(); + if let Some(peer_state) = inner.peer_states.get_mut(peer_id) { let now = clock.now_utc(); peer_state.last_seen = now; peer_state.status = KnownPeerStatus::Banned(ban_reason, now); - self.store.set_peer_state(peer_id, peer_state)?; + store.set_peer_state(peer_id, peer_state)?; } else { bail!("Peer {} is missing in the peer store", peer_id); } Ok(()) } - /// Deletes peers from the internal cache and the persistent store. - fn delete_peers(&mut self, peer_ids: &[PeerId]) -> anyhow::Result<()> { - for peer_id in peer_ids { - if let Some(peer_state) = self.peer_states.remove(peer_id) { - if let Some(addr) = peer_state.peer_info.addr { - self.addr_peers.remove(&addr); - } - } - } - Ok(self.store.delete_peer_states(peer_ids)?) - } - - pub(crate) fn peer_unban(&mut self, peer_id: &PeerId) -> anyhow::Result<()> { - if let Some(peer_state) = self.peer_states.get_mut(peer_id) { - peer_state.status = KnownPeerStatus::NotConnected; - self.store.set_peer_state(&peer_id, peer_state)?; - } else { - bail!("Peer {} is missing in the peer store", peer_id); - } - Ok(()) - } - - /// Find a random subset of peers based on filter. - fn find_peers(&self, filter: F, count: usize) -> Vec - where - F: FnMut(&&KnownPeerState) -> bool, - { - (self.peer_states.values()) - .filter(filter) - .choose_multiple(&mut thread_rng(), count) - .into_iter() - .map(|kps| kps.peer_info.clone()) - .collect() - } - /// Return unconnected or peers with unknown status that we can try to connect to. /// Peers with unknown addresses are filtered out. pub(crate) fn unconnected_peer( &self, ignore_fn: impl Fn(&KnownPeerState) -> bool, ) -> Option { - self.find_peers( + let inner = self.0.lock(); + inner.find_peers( |p| { (p.status == KnownPeerStatus::NotConnected || p.status == KnownPeerStatus::Unknown) && !ignore_fn(p) && p.peer_info.addr.is_some() - // if we're connecting only to the bood nodes - filter out the nodes that are not bootnodes. - && (!self.connect_only_to_boot_nodes || self.boot_nodes.contains(&p.peer_info.id)) + // If we're connecting only to the boot nodes - filter out the nodes that are not boot nodes. + && (!inner.config.connect_only_to_boot_nodes || inner.boot_nodes.contains(&p.peer_info.id)) }, 1, ) @@ -282,137 +421,26 @@ impl PeerStore { /// Return healthy known peers up to given amount. pub(crate) fn healthy_peers(&self, max_count: usize) -> Vec { - self.find_peers(|p| matches!(p.status, KnownPeerStatus::Banned(_, _)).not(), max_count) - } - - /// Return iterator over all known peers. - pub(crate) fn iter(&self) -> Iter<'_, PeerId, KnownPeerState> { - self.peer_states.iter() + self.0 + .lock() + .find_peers(|p| matches!(p.status, KnownPeerStatus::Banned(_, _)).not(), max_count) } /// Removes peers that are not responding for expiration period. - pub(crate) fn remove_expired( - &mut self, - clock: &time::Clock, - config: &config::NetworkConfig, - ) -> anyhow::Result<()> { + pub(crate) fn remove_expired(&self, clock: &time::Clock) -> anyhow::Result<()> { + let mut inner = self.0.lock(); let now = clock.now_utc(); let mut to_remove = vec![]; - for (peer_id, peer_status) in self.peer_states.iter() { + for (peer_id, peer_status) in inner.peer_states.iter() { let diff = now - peer_status.last_seen; if peer_status.status != KnownPeerStatus::Connected - && diff > config.peer_expiration_duration + && diff > inner.config.peer_expiration_duration { - debug!(target: "network", "Removing peer: last seen {:?}", diff); + tracing::debug!(target: "network", "Removing peer: last seen {:?} ago", diff); to_remove.push(peer_id.clone()); } } - self.delete_peers(&to_remove) - } - - /// Copies the in-mem state of the peer to DB. - fn touch(&mut self, peer_id: &PeerId) -> anyhow::Result<()> { - Ok(match self.peer_states.get(peer_id) { - Some(peer_state) => self.store.set_peer_state(&peer_id, peer_state)?, - None => (), - }) - } - - /// Create new pair between peer_info.id and peer_addr removing - /// old pairs if necessary. - fn update_peer_info( - &mut self, - clock: &time::Clock, - peer_info: PeerInfo, - peer_addr: SocketAddr, - trust_level: TrustLevel, - ) -> anyhow::Result<()> { - let mut touch_other = None; - - // If there is a peer associated with current address remove the address from it. - if let Some(verified_peer) = self.addr_peers.remove(&peer_addr) { - self.peer_states.entry(verified_peer.peer_id).and_modify(|peer_state| { - peer_state.peer_info.addr = None; - touch_other = Some(peer_state.peer_info.id.clone()); - }); - } - - // If this peer already has an address, remove that pair from the index. - if let Some(peer_state) = self.peer_states.get_mut(&peer_info.id) { - if let Some(cur_addr) = peer_state.peer_info.addr.take() { - self.addr_peers.remove(&cur_addr); - } - } - - // Add new address - self.addr_peers - .insert(peer_addr, VerifiedPeer { peer_id: peer_info.id.clone(), trust_level }); - - let now = clock.now_utc(); - - // Update peer_id addr - self.peer_states - .entry(peer_info.id.clone()) - .and_modify(|peer_state| peer_state.peer_info.addr = Some(peer_addr)) - .or_insert_with(|| KnownPeerState::new(peer_info.clone(), now)); - - self.touch(&peer_info.id)?; - if let Some(touch_other) = touch_other { - self.touch(&touch_other)?; - } - Ok(()) - } - - /// Adds a peer into the store with given trust level. - #[inline(always)] - fn add_peer( - &mut self, - clock: &time::Clock, - peer_info: PeerInfo, - trust_level: TrustLevel, - ) -> anyhow::Result<()> { - if let Some(peer_addr) = peer_info.addr { - match trust_level { - TrustLevel::Signed => { - self.update_peer_info(clock, peer_info, peer_addr, TrustLevel::Signed)?; - } - TrustLevel::Direct => { - // If this peer already exists with a signed connection ignore this update. - // Warning: This is a problem for nodes that changes its address without changing peer_id. - // It is recommended to change peer_id if address is changed. - let is_peer_trusted = - self.peer_states.get(&peer_info.id).map_or(false, |peer_state| { - peer_state.peer_info.addr.map_or(false, |current_addr| { - self.addr_peers.get(¤t_addr).map_or(false, |verified_peer| { - verified_peer.trust_level == TrustLevel::Signed - }) - }) - }); - if is_peer_trusted { - return Ok(()); - } - - self.update_peer_info(clock, peer_info, peer_addr, TrustLevel::Direct)?; - } - TrustLevel::Indirect => { - // We should only update an Indirect connection if we don't know anything about the peer - // or about the address. - if !self.peer_states.contains_key(&peer_info.id) - && !self.addr_peers.contains_key(&peer_addr) - { - self.update_peer_info(clock, peer_info, peer_addr, TrustLevel::Indirect)?; - } - } - } - } else { - // If doesn't have the address attached it is not verified and we add it - // only if it is unknown to us. - if !self.peer_states.contains_key(&peer_info.id) { - self.peer_states - .insert(peer_info.id.clone(), KnownPeerState::new(peer_info, clock.now_utc())); - } - } - Ok(()) + inner.delete_peers(&to_remove) } /// Adds peers we’ve learned about from other peers. @@ -423,23 +451,25 @@ impl PeerStore { /// /// See also [`Self::add_direct_peer`] and [`Self::add_signed_peer`]. pub(crate) fn add_indirect_peers( - &mut self, + &self, clock: &time::Clock, peers: impl Iterator, ) -> Result<(), Box> { + let mut inner = self.0.lock(); let mut total: usize = 0; let mut blacklisted: usize = 0; for peer_info in peers { total += 1; - let is_blacklisted = peer_info.addr.map_or(false, |addr| self.blacklist.contains(addr)); + let is_blacklisted = + peer_info.addr.map_or(false, |addr| inner.config.blacklist.contains(addr)); if is_blacklisted { blacklisted += 1; } else { - self.add_peer(clock, peer_info, TrustLevel::Indirect)?; + inner.add_peer(clock, peer_info, TrustLevel::Indirect)?; } } if blacklisted != 0 { - info!(target: "network", "Ignored {} blacklisted peers out of {} indirect peer(s)", + tracing::info!(target: "network", "Ignored {} blacklisted peers out of {} indirect peer(s)", blacklisted, total); } Ok(()) @@ -453,25 +483,35 @@ impl PeerStore { /// /// See also [`Self::add_indirect_peers`] and [`Self::add_signed_peer`]. pub(crate) fn add_direct_peer( - &mut self, + &self, clock: &time::Clock, peer_info: PeerInfo, ) -> anyhow::Result<()> { - self.add_peer(clock, peer_info, TrustLevel::Direct) + self.0.lock().add_peer(clock, peer_info, TrustLevel::Direct) } - /// Adds a peer which proved to have secret key associated with the ID. - /// - /// The host have sent us a message signed with a secret key corresponding - /// to the peer ID thus we can be sure that they control the secret key. - /// - /// See also [`Self::add_indirect_peers`] and [`Self::add_direct_peer`]. - pub(crate) fn add_signed_peer( - &mut self, - clock: &time::Clock, - peer_info: PeerInfo, - ) -> anyhow::Result<()> { - self.add_peer(clock, peer_info, TrustLevel::Signed) + pub fn unban(&self, clock: &time::Clock) { + let mut inner = self.0.lock(); + let now = clock.now_utc(); + let mut to_unban = vec![]; + for (peer_id, peer_state) in &inner.peer_states { + if let KnownPeerStatus::Banned(_, ban_time) = peer_state.status { + if now < ban_time + inner.config.ban_window { + continue; + } + tracing::info!(target: "network", unbanned = ?peer_id, ?ban_time, "unbanning a peer"); + to_unban.push(peer_id.clone()); + } + } + for peer_id in &to_unban { + if let Err(err) = inner.peer_unban(&peer_id) { + tracing::error!(target: "network", ?err, "Failed to unban a peer"); + } + } + } + + pub fn load(&self) -> HashMap { + self.0.lock().peer_states.clone() } } diff --git a/chain/network/src/peer_manager/peer_store/testonly.rs b/chain/network/src/peer_manager/peer_store/testonly.rs new file mode 100644 index 00000000000..ec450d4084e --- /dev/null +++ b/chain/network/src/peer_manager/peer_store/testonly.rs @@ -0,0 +1,7 @@ +use crate::types::KnownPeerState; + +impl super::PeerStore { + pub fn dump(&self) -> Vec { + self.0.lock().peer_states.values().cloned().collect() + } +} diff --git a/chain/network/src/peer_manager/peer_store/tests.rs b/chain/network/src/peer_manager/peer_store/tests.rs index fe53b352b91..45101e6039a 100644 --- a/chain/network/src/peer_manager/peer_store/tests.rs +++ b/chain/network/src/peer_manager/peer_store/tests.rs @@ -1,5 +1,6 @@ use super::*; -use crate::blacklist; +use crate::blacklist::Blacklist; +use crate::time; use near_crypto::{KeyType, SecretKey}; use near_store::{NodeStorage, StoreOpener}; use std::collections::HashSet; @@ -25,6 +26,20 @@ fn gen_peer_info(port: u16) -> PeerInfo { } } +fn make_config( + boot_nodes: &[PeerInfo], + blacklist: blacklist::Blacklist, + connect_only_to_boot_nodes: bool, +) -> Config { + Config { + boot_nodes: boot_nodes.iter().cloned().collect(), + blacklist, + connect_only_to_boot_nodes, + ban_window: time::Duration::seconds(1), + peer_expiration_duration: time::Duration::days(1000), + } +} + #[test] fn ban_store() { let clock = time::FakeClock::default(); @@ -34,17 +49,24 @@ fn ban_store() { let boot_nodes = vec![peer_info_a, peer_info_to_ban.clone()]; { let store = store::Store::from(opener.open().unwrap()); - let mut peer_store = - PeerStore::new(&clock.clock(), store, &boot_nodes, Default::default(), false).unwrap(); + let peer_store = PeerStore::new( + &clock.clock(), + make_config(&boot_nodes, Blacklist::default(), false), + store, + ) + .unwrap(); assert_eq!(peer_store.healthy_peers(3).len(), 2); peer_store.peer_ban(&clock.clock(), &peer_info_to_ban.id, ReasonForBan::Abusive).unwrap(); assert_eq!(peer_store.healthy_peers(3).len(), 1); } { let store_new = store::Store::from(opener.open().unwrap()); - let peer_store_new = - PeerStore::new(&clock.clock(), store_new, &boot_nodes, Default::default(), false) - .unwrap(); + let peer_store_new = PeerStore::new( + &clock.clock(), + make_config(&boot_nodes, Blacklist::default(), false), + store_new, + ) + .unwrap(); assert_eq!(peer_store_new.healthy_peers(3).len(), 1); } } @@ -57,8 +79,12 @@ fn test_unconnected_peer() { let boot_nodes = vec![peer_info_a, peer_info_to_ban]; { let store = store::Store::from(near_store::db::TestDB::new()); - let peer_store = - PeerStore::new(&clock.clock(), store, &boot_nodes, Default::default(), false).unwrap(); + let peer_store = PeerStore::new( + &clock.clock(), + make_config(&boot_nodes, Blacklist::default(), false), + store, + ) + .unwrap(); assert!(peer_store.unconnected_peer(|_| false).is_some()); assert!(peer_store.unconnected_peer(|_| true).is_none()); } @@ -76,9 +102,13 @@ fn test_unconnected_peer_only_boot_nodes() { // we should connect to peer_in_store { let store = store::Store::from(near_store::db::TestDB::new()); - let mut peer_store = - PeerStore::new(&clock.clock(), store, &boot_nodes, Default::default(), false).unwrap(); - peer_store.add_peer(&clock.clock(), peer_in_store.clone(), TrustLevel::Direct).unwrap(); + let peer_store = PeerStore::new( + &clock.clock(), + make_config(&boot_nodes, Blacklist::default(), false), + store, + ) + .unwrap(); + peer_store.add_direct_peer(&clock.clock(), peer_in_store.clone()).unwrap(); peer_store.peer_connected(&clock.clock(), &peer_info_a).unwrap(); assert_eq!(peer_store.unconnected_peer(|_| false), Some(peer_in_store.clone())); } @@ -88,9 +118,13 @@ fn test_unconnected_peer_only_boot_nodes() { // connect to only boot nodes is enabled - we should not find any peer to connect to. { let store = store::Store::from(near_store::db::TestDB::new()); - let mut peer_store = - PeerStore::new(&clock.clock(), store, &boot_nodes, Default::default(), true).unwrap(); - peer_store.add_peer(&clock.clock(), peer_in_store.clone(), TrustLevel::Direct).unwrap(); + let peer_store = PeerStore::new( + &clock.clock(), + make_config(&boot_nodes, Default::default(), true), + store, + ) + .unwrap(); + peer_store.add_direct_peer(&clock.clock(), peer_in_store.clone()).unwrap(); peer_store.peer_connected(&clock.clock(), &peer_info_a).unwrap(); assert_eq!(peer_store.unconnected_peer(|_| false), None); } @@ -99,15 +133,13 @@ fn test_unconnected_peer_only_boot_nodes() { // we should connect to it - no matter what the setting is. for connect_to_boot_nodes in [true, false] { let store = store::Store::from(near_store::db::TestDB::new()); - let mut peer_store = PeerStore::new( + let peer_store = PeerStore::new( &clock.clock(), + make_config(&boot_nodes, Default::default(), connect_to_boot_nodes), store, - &boot_nodes, - Default::default(), - connect_to_boot_nodes, ) .unwrap(); - peer_store.add_peer(&clock.clock(), peer_info_a.clone(), TrustLevel::Direct).unwrap(); + peer_store.add_direct_peer(&clock.clock(), peer_info_a.clone()).unwrap(); assert_eq!(peer_store.unconnected_peer(|_| false), Some(peer_info_a.clone())); } } @@ -117,11 +149,12 @@ fn check_exist( peer_id: &PeerId, addr_level: Option<(SocketAddr, TrustLevel)>, ) -> bool { - if let Some(peer_info) = peer_store.peer_states.get(peer_id) { + let inner = peer_store.0.lock(); + if let Some(peer_info) = inner.peer_states.get(peer_id) { let peer_info = &peer_info.peer_info; if let Some((addr, level)) = addr_level { peer_info.addr.map_or(false, |cur_addr| cur_addr == addr) - && peer_store + && inner .addr_peers .get(&addr) .map_or(false, |verified| verified.trust_level == level) @@ -134,15 +167,16 @@ fn check_exist( } fn check_integrity(peer_store: &PeerStore) -> bool { - peer_store.peer_states.clone().iter().all(|(k, v)| { + let inner = peer_store.0.lock(); + inner.peer_states.clone().iter().all(|(k, v)| { if let Some(addr) = v.peer_info.addr { - if peer_store.addr_peers.get(&addr).map_or(true, |value| value.peer_id != *k) { + if inner.addr_peers.get(&addr).map_or(true, |value| value.peer_id != *k) { return false; } } true - }) && peer_store.addr_peers.clone().iter().all(|(k, v)| { - !peer_store + }) && inner.addr_peers.clone().iter().all(|(k, v)| { + !inner .peer_states .get(&v.peer_id) .map_or(true, |value| value.peer_info.addr.map_or(true, |addr| addr != *k)) @@ -155,8 +189,8 @@ fn check_integrity(peer_store: &PeerStore) -> bool { fn handle_peer_id_change() { let clock = time::FakeClock::default(); let store = store::Store::from(near_store::db::TestDB::new()); - let mut peer_store = - PeerStore::new(&clock.clock(), store, &[], Default::default(), false).unwrap(); + let peer_store = + PeerStore::new(&clock.clock(), make_config(&[], Default::default(), false), store).unwrap(); let peers_id = (0..2).map(|ix| get_peer_id(format!("node{}", ix))).collect::>(); let addr = get_addr(0); @@ -166,7 +200,7 @@ fn handle_peer_id_change() { assert!(check_exist(&peer_store, &peers_id[0], Some((addr, TrustLevel::Signed)))); let peer_ba = get_peer_info(peers_id[1].clone(), Some(addr)); - peer_store.add_peer(&clock.clock(), peer_ba, TrustLevel::Direct).unwrap(); + peer_store.add_direct_peer(&clock.clock(), peer_ba).unwrap(); assert!(check_exist(&peer_store, &peers_id[0], None)); assert!(check_exist(&peer_store, &peers_id[1], Some((addr, TrustLevel::Direct)))); @@ -180,8 +214,8 @@ fn handle_peer_id_change() { fn dont_handle_address_change() { let clock = time::FakeClock::default(); let store = store::Store::from(near_store::db::TestDB::new()); - let mut peer_store = - PeerStore::new(&clock.clock(), store, &[], Default::default(), false).unwrap(); + let peer_store = + PeerStore::new(&clock.clock(), make_config(&[], Default::default(), false), store).unwrap(); let peers_id = (0..1).map(|ix| get_peer_id(format!("node{}", ix))).collect::>(); let addrs = (0..2).map(get_addr).collect::>(); @@ -191,7 +225,7 @@ fn dont_handle_address_change() { assert!(check_exist(&peer_store, &peers_id[0], Some((addrs[0], TrustLevel::Signed)))); let peer_ba = get_peer_info(peers_id[0].clone(), Some(addrs[1])); - peer_store.add_peer(&clock.clock(), peer_ba, TrustLevel::Direct).unwrap(); + peer_store.add_direct_peer(&clock.clock(), peer_ba).unwrap(); assert!(check_exist(&peer_store, &peers_id[0], Some((addrs[0], TrustLevel::Signed)))); assert!(check_integrity(&peer_store)); } @@ -200,8 +234,9 @@ fn dont_handle_address_change() { fn check_add_peers_overriding() { let clock = time::FakeClock::default(); let store = store::Store::from(near_store::db::TestDB::new()); - let mut peer_store = - PeerStore::new(&clock.clock(), store.clone(), &[], Default::default(), false).unwrap(); + let peer_store = + PeerStore::new(&clock.clock(), make_config(&[], Default::default(), false), store.clone()) + .unwrap(); // Five peers: A, B, C, D, X, T let peers_id = (0..6).map(|ix| get_peer_id(format!("node{}", ix))).collect::>(); @@ -216,7 +251,7 @@ fn check_add_peers_overriding() { // Create direct connection B - #B let peer_11 = get_peer_info(peers_id[1].clone(), Some(addrs[1])); - peer_store.add_peer(&clock.clock(), peer_11.clone(), TrustLevel::Direct).unwrap(); + peer_store.add_direct_peer(&clock.clock(), peer_11.clone()).unwrap(); assert!(check_exist(&peer_store, &peers_id[1], Some((addrs[1], TrustLevel::Direct)))); assert!(check_integrity(&peer_store)); @@ -227,7 +262,7 @@ fn check_add_peers_overriding() { // Create indirect connection C - #C let peer_22 = get_peer_info(peers_id[2].clone(), Some(addrs[2])); - peer_store.add_peer(&clock.clock(), peer_22.clone(), TrustLevel::Indirect).unwrap(); + peer_store.add_indirect_peers(&clock.clock(), [peer_22.clone()].into_iter()).unwrap(); assert!(check_exist(&peer_store, &peers_id[2], Some((addrs[2], TrustLevel::Indirect)))); assert!(check_integrity(&peer_store)); @@ -246,24 +281,24 @@ fn check_add_peers_overriding() { // Create indirect connection D - #D let peer_33 = get_peer_info(peers_id[3].clone(), Some(addrs[3])); - peer_store.add_peer(&clock.clock(), peer_33, TrustLevel::Indirect).unwrap(); + peer_store.add_indirect_peers(&clock.clock(), [peer_33].into_iter()).unwrap(); assert!(check_exist(&peer_store, &peers_id[3], Some((addrs[3], TrustLevel::Indirect)))); assert!(check_integrity(&peer_store)); // Try to create indirect connection A - #X but fails since A - #A exists let peer_04 = get_peer_info(peers_id[0].clone(), Some(addrs[4])); - peer_store.add_peer(&clock.clock(), peer_04, TrustLevel::Indirect).unwrap(); + peer_store.add_indirect_peers(&clock.clock(), [peer_04].into_iter()).unwrap(); assert!(check_exist(&peer_store, &peers_id[0], Some((addrs[0], TrustLevel::Signed)))); assert!(check_integrity(&peer_store)); // Try to create indirect connection X - #D but fails since D - #D exists let peer_43 = get_peer_info(peers_id[4].clone(), Some(addrs[3])); - peer_store.add_peer(&clock.clock(), peer_43.clone(), TrustLevel::Indirect).unwrap(); + peer_store.add_indirect_peers(&clock.clock(), [peer_43.clone()].into_iter()).unwrap(); assert!(check_exist(&peer_store, &peers_id[3], Some((addrs[3], TrustLevel::Indirect)))); assert!(check_integrity(&peer_store)); // Create Direct connection X - #D and succeed removing connection D - #D - peer_store.add_peer(&clock.clock(), peer_43, TrustLevel::Direct).unwrap(); + peer_store.add_direct_peer(&clock.clock(), peer_43).unwrap(); assert!(check_exist(&peer_store, &peers_id[4], Some((addrs[3], TrustLevel::Direct)))); // D should still exist, but without any addr assert!(check_exist(&peer_store, &peers_id[3], None)); @@ -271,13 +306,13 @@ fn check_add_peers_overriding() { // Try to create indirect connection A - #T but fails since A - #A (signed) exists let peer_05 = get_peer_info(peers_id[0].clone(), Some(addrs[5])); - peer_store.add_peer(&clock.clock(), peer_05, TrustLevel::Direct).unwrap(); + peer_store.add_direct_peer(&clock.clock(), peer_05).unwrap(); assert!(check_exist(&peer_store, &peers_id[0], Some((addrs[0], TrustLevel::Signed)))); assert!(check_integrity(&peer_store)); // Check we are able to recover from store previous signed connection let peer_store_2 = - PeerStore::new(&clock.clock(), store, &[], Default::default(), false).unwrap(); + PeerStore::new(&clock.clock(), make_config(&[], Default::default(), false), store).unwrap(); assert!(check_exist(&peer_store_2, &peers_id[0], Some((addrs[0], TrustLevel::Indirect)))); assert!(check_integrity(&peer_store_2)); } @@ -288,8 +323,9 @@ fn check_ignore_blacklisted_peers() { #[track_caller] fn assert_peers(peer_store: &PeerStore, expected: &[&PeerId]) { + let inner = peer_store.0.lock(); let expected: HashSet<&PeerId> = HashSet::from_iter(expected.iter().cloned()); - let got = HashSet::from_iter(peer_store.peer_states.keys()); + let got = HashSet::from_iter(inner.peer_states.keys()); assert_eq!(expected, got); } @@ -298,8 +334,12 @@ fn check_ignore_blacklisted_peers() { // Populate store with three peers. { - let mut peer_store = - PeerStore::new(&clock.clock(), store.clone(), &[], Default::default(), false).unwrap(); + let peer_store = PeerStore::new( + &clock.clock(), + make_config(&[], Default::default(), false), + store.clone(), + ) + .unwrap(); peer_store .add_indirect_peers( &clock.clock(), @@ -317,8 +357,12 @@ fn check_ignore_blacklisted_peers() { // Peers without address aren’t saved but make sure the rest are read // correctly. { - let peer_store = - PeerStore::new(&clock.clock(), store.clone(), &[], Default::default(), false).unwrap(); + let peer_store = PeerStore::new( + &clock.clock(), + make_config(&[], Default::default(), false), + store.clone(), + ) + .unwrap(); assert_peers(&peer_store, &[&ids[1], &ids[2]]); } @@ -326,7 +370,8 @@ fn check_ignore_blacklisted_peers() { { let blacklist: blacklist::Blacklist = ["127.0.0.1:2", "127.0.0.1:5"].iter().map(|e| e.parse().unwrap()).collect(); - let mut peer_store = PeerStore::new(&clock.clock(), store, &[], blacklist, false).unwrap(); + let peer_store = + PeerStore::new(&clock.clock(), make_config(&[], blacklist, false), store).unwrap(); // Peer 127.0.0.1:2 is removed since it's blacklisted. assert_peers(&peer_store, &[&ids[1]]); @@ -361,8 +406,9 @@ fn remove_blacklisted_peers_from_store() { // Add three peers. { let store = store::Store::from(opener.open().unwrap()); - let mut peer_store = - PeerStore::new(&clock.clock(), store, &[], Default::default(), false).unwrap(); + let peer_store = + PeerStore::new(&clock.clock(), make_config(&[], Default::default(), false), store) + .unwrap(); peer_store.add_indirect_peers(&clock.clock(), peer_infos.clone().into_iter()).unwrap(); } assert_peers_in_store(&opener, &peer_ids); @@ -372,7 +418,8 @@ fn remove_blacklisted_peers_from_store() { let store = store::Store::from(opener.open().unwrap()); let blacklist: blacklist::Blacklist = [blacklist::Entry::from_addr(peer_infos[2].addr.unwrap())].into_iter().collect(); - let _peer_store = PeerStore::new(&clock.clock(), store, &[], blacklist, false).unwrap(); + let _peer_store = + PeerStore::new(&clock.clock(), make_config(&[], blacklist, false), store).unwrap(); } assert_peers_in_store(&opener, &peer_ids[0..2]); } @@ -391,12 +438,13 @@ fn assert_peers_in_cache( expected_peers: &[PeerId], expected_addresses: &[SocketAddr], ) { + let inner = peer_store.0.lock(); let expected_peers: HashSet<&PeerId> = HashSet::from_iter(expected_peers); - let cached_peers = HashSet::from_iter(peer_store.peer_states.keys()); + let cached_peers = HashSet::from_iter(inner.peer_states.keys()); assert_eq!(expected_peers, cached_peers); let expected_addresses: HashSet<&SocketAddr> = HashSet::from_iter(expected_addresses); - let cached_addresses = HashSet::from_iter(peer_store.addr_peers.keys()); + let cached_addresses = HashSet::from_iter(inner.addr_peers.keys()); assert_eq!(expected_addresses, cached_addresses); } @@ -416,18 +464,20 @@ fn test_delete_peers() { { let store = store::Store::from(opener.open().unwrap()); - let mut peer_store = - PeerStore::new(&clock.clock(), store, &[], Default::default(), false).unwrap(); + let peer_store = + PeerStore::new(&clock.clock(), make_config(&[], Default::default(), false), store) + .unwrap(); peer_store.add_indirect_peers(&clock.clock(), peer_infos.into_iter()).unwrap(); } assert_peers_in_store(&opener, &peer_ids); { let store = store::Store::from(opener.open().unwrap()); - let mut peer_store = - PeerStore::new(&clock.clock(), store, &[], Default::default(), false).unwrap(); + let peer_store = + PeerStore::new(&clock.clock(), make_config(&[], Default::default(), false), store) + .unwrap(); assert_peers_in_cache(&peer_store, &peer_ids, &peer_addresses); - peer_store.delete_peers(&peer_ids).unwrap(); + peer_store.0.lock().delete_peers(&peer_ids).unwrap(); assert_peers_in_cache(&peer_store, &[], &[]); } assert_peers_in_store(&opener, &[]); diff --git a/chain/network/src/peer_manager/testonly.rs b/chain/network/src/peer_manager/testonly.rs index 4652a48a26a..6649290c27d 100644 --- a/chain/network/src/peer_manager/testonly.rs +++ b/chain/network/src/peer_manager/testonly.rs @@ -8,11 +8,13 @@ use crate::peer; use crate::peer::peer_actor::ClosingReason; use crate::peer_manager::peer_manager_actor::Event as PME; use crate::tcp; +use crate::test_utils; use crate::testonly::actix::ActixSystem; use crate::testonly::fake_client; use crate::time; use crate::types::{ - ChainInfo, GetNetworkInfo, KnownPeerStatus, PeerManagerMessageRequest, SetChainInfo, + ChainInfo, GetNetworkInfo, KnownPeerStatus, PeerManagerMessageRequest, + PeerManagerMessageResponse, SetChainInfo, }; use crate::PeerManagerActor; use near_o11y::{WithSpanContext, WithSpanContextExt}; @@ -34,11 +36,13 @@ impl actix::Handler> for PeerManagerActor { // Check that the set of ready connections matches the PeerStore state. let tier2: HashSet<_> = self.state.tier2.load().ready.keys().cloned().collect(); let store: HashSet<_> = self + .state .peer_store - .iter() - .filter_map(|(peer_id, state)| { + .dump() + .into_iter() + .filter_map(|state| { if state.status == KnownPeerStatus::Connected { - Some(peer_id.clone()) + Some(state.peer_info.id) } else { None } @@ -272,6 +276,32 @@ impl ActorHandler { self.events.recv_until(unwrap_sync_accounts_data_processed).await; } } + + // Awaits until the routing_table matches `want`. + pub async fn wait_for_routing_table(&self, want: &[(PeerId, Vec)]) { + let mut events = self.events.from_now(); + loop { + let resp = self + .actix + .addr + .send(PeerManagerMessageRequest::FetchRoutingTable.with_span_context()) + .await + .unwrap(); + let got = match resp { + PeerManagerMessageResponse::FetchRoutingTable(rt) => rt.next_hops, + _ => panic!("bad response"), + }; + if test_utils::expected_routing_tables(&got, want) { + return; + } + events + .recv_until(|ev| match ev { + Event::PeerManager(PME::RoutingTableUpdate { .. }) => Some(()), + _ => None, + }) + .await; + } + } } pub(crate) async fn start( diff --git a/chain/network/src/peer_manager/tests.rs b/chain/network/src/peer_manager/tests.rs index 5f409c77374..3d1ac76e966 100644 --- a/chain/network/src/peer_manager/tests.rs +++ b/chain/network/src/peer_manager/tests.rs @@ -21,6 +21,8 @@ use crate::types::{PeerMessage, RoutingTableUpdate}; use itertools::Itertools; use near_o11y::testonly::init_test_logger; use near_primitives::version::PROTOCOL_VERSION; +use near_store::db::TestDB; +use peer_manager::testonly::start as start_pm; use pretty_assertions::assert_eq; use rand::seq::SliceRandom as _; use rand::Rng as _; @@ -159,6 +161,7 @@ async fn no_edge_broadcast_after_restart() { // Receive the initial sync, which will consist just of the current edge: // - the disconnected edges from the previous iterations are not loaded yet. // - the local edges weren't stored at all. + tracing::info!(target: "test", "wait_for_edges()"); wait_for_edges(&mut peer, &[edge.clone()].into()).await; // Create a bunch of fresh unreachable edges, then send all the edges created so far. @@ -179,9 +182,11 @@ async fn no_edge_broadcast_after_restart() { .await; // Wait for the fresh edges to be broadcasted back. + tracing::info!(target: "test", "wait_for_edges()"); wait_for_edges(&mut peer, &fresh_edges).await; // Wait for all the disconnected edges created so far to be saved to storage. + tracing::info!(target: "test", "wait for pruning"); let mut pruned = HashSet::new(); while pruned != total_edges { match events.recv().await { @@ -682,3 +687,56 @@ async fn loop_connection() { reason ); } + +#[tokio::test] +async fn square() { + init_test_logger(); + let mut rng = make_rng(921853233); + let rng = &mut rng; + let mut clock = time::FakeClock::default(); + let chain = Arc::new(data::Chain::make(&mut clock, rng, 10)); + + tracing::info!(target:"test", "connect 4 nodes in a square"); + let pm0 = start_pm(clock.clock(), TestDB::new(), chain.make_config(rng), chain.clone()).await; + let pm1 = start_pm(clock.clock(), TestDB::new(), chain.make_config(rng), chain.clone()).await; + let pm2 = start_pm(clock.clock(), TestDB::new(), chain.make_config(rng), chain.clone()).await; + let pm3 = start_pm(clock.clock(), TestDB::new(), chain.make_config(rng), chain.clone()).await; + pm0.connect_to(&pm1.peer_info()).await; + pm1.connect_to(&pm2.peer_info()).await; + pm2.connect_to(&pm3.peer_info()).await; + pm3.connect_to(&pm0.peer_info()).await; + let id0 = pm0.cfg.node_id(); + let id1 = pm1.cfg.node_id(); + let id2 = pm2.cfg.node_id(); + let id3 = pm3.cfg.node_id(); + + pm0.wait_for_routing_table(&[ + (id1.clone(), vec![id1.clone()]), + (id3.clone(), vec![id3.clone()]), + (id2.clone(), vec![id1.clone(), id3.clone()]), + ]) + .await; + tracing::info!(target:"test","stop {id1}"); + drop(pm1); + tracing::info!(target:"test","wait for {id0} routing table"); + pm0.wait_for_routing_table(&[ + (id3.clone(), vec![id3.clone()]), + (id2.clone(), vec![id3.clone()]), + ]) + .await; + tracing::info!(target:"test","wait for {id2} routing table"); + pm2.wait_for_routing_table(&[ + (id3.clone(), vec![id3.clone()]), + (id0.clone(), vec![id3.clone()]), + ]) + .await; + tracing::info!(target:"test","wait for {id3} routing table"); + pm3.wait_for_routing_table(&[ + (id2.clone(), vec![id2.clone()]), + (id0.clone(), vec![id0.clone()]), + ]) + .await; + drop(pm0); + drop(pm2); + drop(pm3); +} diff --git a/chain/network/src/private_actix.rs b/chain/network/src/private_actix.rs index 91a800287dd..4b4aa41f1cc 100644 --- a/chain/network/src/private_actix.rs +++ b/chain/network/src/private_actix.rs @@ -2,7 +2,7 @@ /// They are not meant to be used outside. use crate::network_protocol::{Edge, PartialEdgeInfo, PeerInfo, PeerMessage}; use crate::peer_manager::connection; -use crate::types::{Ban, ReasonForBan}; +use crate::types::ReasonForBan; use near_primitives::network::PeerId; use std::collections::HashMap; use std::fmt; @@ -21,8 +21,6 @@ pub(crate) enum PeerToManagerMsg { RegisterPeer(RegisterPeer), PeersRequest(PeersRequest), PeersResponse(PeersResponse), - Unregister(Unregister), - Ban(Ban), RequestUpdateNonce(PeerId, PartialEdgeInfo), ResponseUpdateNonce(Edge), // PeerRequest diff --git a/chain/network/src/test_utils.rs b/chain/network/src/test_utils.rs index c713c6809a2..ef1d65f394c 100644 --- a/chain/network/src/test_utils.rs +++ b/chain/network/src/test_utils.rs @@ -278,7 +278,7 @@ impl Handler> for PeerManagerActor { ) -> Self::Result { let (_span, msg) = handler_debug_span!(target: "network", msg); debug!(target: "network", "Ban peer: {:?}", msg.peer_id); - self.try_ban_peer(&msg.peer_id, msg.ban_reason); + self.state.disconnect_and_ban(&self.clock, &msg.peer_id, msg.ban_reason); } } diff --git a/chain/network/src/types.rs b/chain/network/src/types.rs index b26567cacc0..d92ed26091d 100644 --- a/chain/network/src/types.rs +++ b/chain/network/src/types.rs @@ -164,22 +164,6 @@ pub enum PeerManagerMessageRequest { }, } -/// Messages from PeerManager to Peer -#[derive(actix::Message, Debug)] -#[rtype(result = "()")] -pub enum PeerManagerRequest { - BanPeer(ReasonForBan), - UnregisterPeer, -} - -/// Messages from PeerManager to Peer with a tracing Context. -#[derive(actix::Message, Debug)] -#[rtype(result = "()")] -pub struct PeerManagerRequestWithContext { - pub msg: PeerManagerRequest, - pub context: opentelemetry::Context, -} - impl PeerManagerMessageRequest { pub fn as_network_requests(self) -> NetworkRequests { if let PeerManagerMessageRequest::NetworkRequests(item) = self { diff --git a/integration-tests/src/tests/nearcore/node_cluster.rs b/integration-tests/src/tests/nearcore/node_cluster.rs index 7322744d15b..dae0d4dcfea 100644 --- a/integration-tests/src/tests/nearcore/node_cluster.rs +++ b/integration-tests/src/tests/nearcore/node_cluster.rs @@ -44,7 +44,7 @@ fn start_nodes( rpc_addrs.push(near_config.rpc_addr().unwrap().to_owned()); near_config.client_config.min_num_peers = (num_nodes as usize) - 1; if i > 0 { - near_config.network_config.boot_nodes = + near_config.network_config.peer_store.boot_nodes = convert_boot_nodes(vec![("near.0", first_node)]); } // if non validator, track all shards diff --git a/integration-tests/src/tests/nearcore/stake_nodes.rs b/integration-tests/src/tests/nearcore/stake_nodes.rs index f3a68665751..63aab2f56a3 100644 --- a/integration-tests/src/tests/nearcore/stake_nodes.rs +++ b/integration-tests/src/tests/nearcore/stake_nodes.rs @@ -67,7 +67,8 @@ fn init_test_staking( genesis.clone(), ); if i != 0 { - config.network_config.boot_nodes = convert_boot_nodes(vec![("near.0", first_node)]); + config.network_config.peer_store.boot_nodes = + convert_boot_nodes(vec![("near.0", first_node)]); } config.client_config.min_num_peers = num_node_seats as usize - 1; config.client_config.epoch_sync_enabled = false; diff --git a/integration-tests/src/tests/nearcore/sync_nodes.rs b/integration-tests/src/tests/nearcore/sync_nodes.rs index 3ba9a81d6d1..b04d3b0e0a3 100644 --- a/integration-tests/src/tests/nearcore/sync_nodes.rs +++ b/integration-tests/src/tests/nearcore/sync_nodes.rs @@ -105,11 +105,11 @@ fn setup_configs() -> (Genesis, Block, NearConfig, NearConfig) { let (port1, port2) = (open_port(), open_port()); let mut near1 = load_test_config("test1", port1, genesis.clone()); - near1.network_config.boot_nodes = convert_boot_nodes(vec![("test2", port2)]); + near1.network_config.peer_store.boot_nodes = convert_boot_nodes(vec![("test2", port2)]); near1.client_config.min_num_peers = 1; near1.client_config.epoch_sync_enabled = false; let mut near2 = load_test_config("test2", port2, genesis.clone()); - near2.network_config.boot_nodes = convert_boot_nodes(vec![("test1", port1)]); + near2.network_config.peer_store.boot_nodes = convert_boot_nodes(vec![("test1", port1)]); near2.client_config.min_num_peers = 1; near2.client_config.epoch_sync_enabled = false; (genesis, genesis_block, near1, near2) @@ -241,12 +241,12 @@ fn sync_state_stake_change() { let (port1, port2) = (open_port(), open_port()); let mut near1 = load_test_config("test1", port1, genesis.clone()); - near1.network_config.boot_nodes = convert_boot_nodes(vec![("test2", port2)]); + near1.network_config.peer_store.boot_nodes = convert_boot_nodes(vec![("test2", port2)]); near1.client_config.min_num_peers = 0; near1.client_config.min_block_production_delay = Duration::from_millis(200); near1.client_config.epoch_sync_enabled = false; let mut near2 = load_test_config("test2", port2, genesis.clone()); - near2.network_config.boot_nodes = convert_boot_nodes(vec![("test1", port1)]); + near2.network_config.peer_store.boot_nodes = convert_boot_nodes(vec![("test1", port1)]); near2.client_config.min_block_production_delay = Duration::from_millis(200); near2.client_config.min_num_peers = 1; near2.client_config.skip_sync_wait = false; diff --git a/integration-tests/src/tests/nearcore/sync_state_nodes.rs b/integration-tests/src/tests/nearcore/sync_state_nodes.rs index 61b1cca0378..2833a063275 100644 --- a/integration-tests/src/tests/nearcore/sync_state_nodes.rs +++ b/integration-tests/src/tests/nearcore/sync_state_nodes.rs @@ -24,7 +24,7 @@ fn sync_state_nodes() { let (port1, port2) = (open_port(), open_port()); let mut near1 = load_test_config("test1", port1, genesis.clone()); - near1.network_config.boot_nodes = convert_boot_nodes(vec![]); + near1.network_config.peer_store.boot_nodes = convert_boot_nodes(vec![]); near1.client_config.min_num_peers = 0; near1.client_config.epoch_sync_enabled = false; run_actix(async move { @@ -56,7 +56,7 @@ fn sync_state_nodes() { load_test_config("test2", port2, genesis2.clone()); near2.client_config.skip_sync_wait = false; near2.client_config.min_num_peers = 1; - near2.network_config.boot_nodes = + near2.network_config.peer_store.boot_nodes = convert_boot_nodes(vec![("test1", port1)]); near2.client_config.epoch_sync_enabled = false; @@ -134,7 +134,7 @@ fn sync_state_nodes_multishard() { let (port1, port2, port3, port4) = (open_port(), open_port(), open_port(), open_port()); let mut near1 = load_test_config("test1", port1, genesis.clone()); - near1.network_config.boot_nodes = + near1.network_config.peer_store.boot_nodes = convert_boot_nodes(vec![("test3", port3), ("test4", port4)]); near1.client_config.min_num_peers = 2; near1.client_config.min_block_production_delay = Duration::from_millis(200); @@ -142,7 +142,7 @@ fn sync_state_nodes_multishard() { near1.client_config.epoch_sync_enabled = false; let mut near3 = load_test_config("test3", port3, genesis.clone()); - near3.network_config.boot_nodes = + near3.network_config.peer_store.boot_nodes = convert_boot_nodes(vec![("test1", port1), ("test4", port4)]); near3.client_config.min_num_peers = 2; near3.client_config.min_block_production_delay = @@ -152,7 +152,7 @@ fn sync_state_nodes_multishard() { near3.client_config.epoch_sync_enabled = false; let mut near4 = load_test_config("test4", port4, genesis.clone()); - near4.network_config.boot_nodes = + near4.network_config.peer_store.boot_nodes = convert_boot_nodes(vec![("test1", port1), ("test3", port3)]); near4.client_config.min_num_peers = 2; near4.client_config.min_block_production_delay = @@ -198,11 +198,12 @@ fn sync_state_nodes_multishard() { Duration::from_millis(200); near2.client_config.max_block_production_delay = Duration::from_millis(400); - near2.network_config.boot_nodes = convert_boot_nodes(vec![ - ("test1", port1), - ("test3", port3), - ("test4", port4), - ]); + near2.network_config.peer_store.boot_nodes = + convert_boot_nodes(vec![ + ("test1", port1), + ("test3", port3), + ("test4", port4), + ]); near2.client_config.epoch_sync_enabled = false; let dir2 = tempfile::Builder::new() @@ -317,7 +318,7 @@ fn sync_empty_state() { if view_client2_holder2.is_none() { let mut near2 = load_test_config("test2", port2, genesis2); - near2.network_config.boot_nodes = + near2.network_config.peer_store.boot_nodes = convert_boot_nodes(vec![("test1", port1)]); near2.client_config.min_num_peers = 1; near2.client_config.min_block_production_delay = diff --git a/integration-tests/src/tests/network/peer_handshake.rs b/integration-tests/src/tests/network/peer_handshake.rs index 22ef6c0fbdb..50963138b51 100644 --- a/integration-tests/src/tests/network/peer_handshake.rs +++ b/integration-tests/src/tests/network/peer_handshake.rs @@ -26,7 +26,7 @@ fn make_peer_manager( peer_max_count: u32, ) -> actix::Addr { let mut config = config::NetworkConfig::from_seed(seed, port); - config.boot_nodes = convert_boot_nodes(boot_nodes); + config.peer_store.boot_nodes = convert_boot_nodes(boot_nodes); config.max_num_peers = peer_max_count; config.ideal_connections_hi = peer_max_count; config.ideal_connections_lo = peer_max_count; diff --git a/integration-tests/src/tests/network/routing.rs b/integration-tests/src/tests/network/routing.rs index cdaa5bfe262..c4a7d4f20ff 100644 --- a/integration-tests/src/tests/network/routing.rs +++ b/integration-tests/src/tests/network/routing.rs @@ -135,6 +135,7 @@ fn test_dont_drop_after_ttl() -> anyhow::Result<()> { runner.push(Action::AddEdge { from: 0, to: 1, force: true }); runner.push(Action::AddEdge { from: 1, to: 2, force: true }); runner.push(Action::CheckRoutingTable(0, vec![(1, vec![1]), (2, vec![1])])); + runner.push(Action::CheckRoutingTable(1, vec![(0, vec![0]), (2, vec![2])])); runner.push(Action::PingTo { source: 0, nonce: 0, target: 2 }); runner.push(Action::CheckPingPong(2, vec![Ping { nonce: 0, source: 0 }], vec![])); runner.push(Action::CheckPingPong(0, vec![], vec![Pong { nonce: 0, source: 2 }])); @@ -180,23 +181,6 @@ fn simple_remove() -> anyhow::Result<()> { start_test(runner) } -#[test] -fn square() -> anyhow::Result<()> { - let mut runner = Runner::new(4, 4); - - runner.push(Action::AddEdge { from: 0, to: 1, force: true }); - runner.push(Action::AddEdge { from: 1, to: 2, force: true }); - runner.push(Action::AddEdge { from: 2, to: 3, force: true }); - runner.push(Action::AddEdge { from: 3, to: 0, force: true }); - runner.push(Action::CheckRoutingTable(0, vec![(1, vec![1]), (3, vec![3]), (2, vec![1, 3])])); - runner.push(Action::Stop(1)); - runner.push(Action::CheckRoutingTable(0, vec![(3, vec![3]), (2, vec![3])])); - runner.push(Action::CheckRoutingTable(2, vec![(3, vec![3]), (0, vec![3])])); - runner.push(Action::CheckRoutingTable(3, vec![(2, vec![2]), (0, vec![0])])); - - start_test(runner) -} - #[test] fn blacklist_01() -> anyhow::Result<()> { let mut runner = Runner::new(2, 2).add_to_blacklist(0, Some(1)).use_boot_nodes(vec![0]); diff --git a/integration-tests/src/tests/network/runner.rs b/integration-tests/src/tests/network/runner.rs index 2e1edaf6ea5..be88980022a 100644 --- a/integration-tests/src/tests/network/runner.rs +++ b/integration-tests/src/tests/network/runner.rs @@ -542,14 +542,14 @@ impl Runner { config.whitelist.iter().map(|ix| self.test_config[*ix].peer_info()).collect(); let mut network_config = config::NetworkConfig::from_seed(&config.account_id, config.port); - network_config.ban_window = config.ban_window; + network_config.peer_store.ban_window = config.ban_window; network_config.max_num_peers = config.max_num_peers; network_config.ttl_account_id_router = time::Duration::seconds(5); network_config.routed_message_ttl = config.routed_message_ttl; - network_config.blacklist = blacklist; + network_config.peer_store.blacklist = blacklist; network_config.whitelist_nodes = whitelist; network_config.outbound_disabled = config.outbound_disabled; - network_config.boot_nodes = boot_nodes; + network_config.peer_store.boot_nodes = boot_nodes; network_config.archive = config.archive; let (send_events, recv_events) = broadcast::unbounded_channel(); network_config.event_sink = send_events.sink(); diff --git a/integration-tests/src/tests/network/stress_network.rs b/integration-tests/src/tests/network/stress_network.rs index 08ebb87daa5..cfb974c9db5 100644 --- a/integration-tests/src/tests/network/stress_network.rs +++ b/integration-tests/src/tests/network/stress_network.rs @@ -24,7 +24,7 @@ fn make_peer_manager( boot_nodes: Vec<(&str, u16)>, ) -> actix::Addr { let mut config = config::NetworkConfig::from_seed(seed, port); - config.boot_nodes = convert_boot_nodes(boot_nodes); + config.peer_store.boot_nodes = convert_boot_nodes(boot_nodes); PeerManagerActor::spawn( time::Clock::real(), near_store::db::TestDB::new(), diff --git a/nearcore/res/example-config-gc.json b/nearcore/res/example-config-gc.json index 420d7f61332..19d75ff2fac 100644 --- a/nearcore/res/example-config-gc.json +++ b/nearcore/res/example-config-gc.json @@ -43,10 +43,6 @@ "secs": 20, "nanos": 0 }, - "reconnect_delay": { - "secs": 60, - "nanos": 0 - }, "skip_sync_wait": false, "ban_window": { "secs": 10800, diff --git a/nearcore/res/example-config-no-gc.json b/nearcore/res/example-config-no-gc.json index 14745cf5a7b..cb1bfe9d523 100644 --- a/nearcore/res/example-config-no-gc.json +++ b/nearcore/res/example-config-no-gc.json @@ -43,10 +43,6 @@ "secs": 20, "nanos": 0 }, - "reconnect_delay": { - "secs": 60, - "nanos": 0 - }, "skip_sync_wait": false, "ban_window": { "secs": 10800, diff --git a/neard/src/cli.rs b/neard/src/cli.rs index 9bd806c227c..fb3d684b663 100644 --- a/neard/src/cli.rs +++ b/neard/src/cli.rs @@ -366,7 +366,7 @@ impl RunCmd { } if let Some(boot_nodes) = self.boot_nodes { if !boot_nodes.is_empty() { - near_config.network_config.boot_nodes = boot_nodes + near_config.network_config.peer_store.boot_nodes = boot_nodes .split(',') .map(|chunk| chunk.parse().expect("Failed to parse PeerInfo")) .collect(); diff --git a/tools/chainsync-loadtest/src/main.rs b/tools/chainsync-loadtest/src/main.rs index caee136068d..6dbaccb8ddf 100644 --- a/tools/chainsync-loadtest/src/main.rs +++ b/tools/chainsync-loadtest/src/main.rs @@ -97,7 +97,7 @@ impl Cmd { let near_config = download_configs(&cmd.chain_id, home_dir).context("Failed to initialize configs")?; - info!("#boot nodes = {}", near_config.network_config.boot_nodes.len()); + info!("#boot nodes = {}", near_config.network_config.peer_store.boot_nodes.len()); // Dropping Runtime is blocking, while futures should never be blocking. // Tokio has a runtime check which panics if you drop tokio Runtime from a future executed // on another Tokio runtime. From 8b8d366c84e9da71f4476307a12d02e4b43fda47 Mon Sep 17 00:00:00 2001 From: Jakob Meier Date: Wed, 26 Oct 2022 15:09:06 +0100 Subject: [PATCH 031/103] doc: parameter overview (#7934) * doc: parameter overview * grammar and a bit more details on sir vs not_sir * use consistent format to name functions --- docs/architecture/gas/README.md | 53 ++++++++++++++++++++++++++++----- 1 file changed, 46 insertions(+), 7 deletions(-) diff --git a/docs/architecture/gas/README.md b/docs/architecture/gas/README.md index 74af103b4d2..1ca8e9e38a4 100644 --- a/docs/architecture/gas/README.md +++ b/docs/architecture/gas/README.md @@ -21,11 +21,50 @@ there to estimate the safe values of the new parameters. This section is for you if you are adding new features such as a new pre-compiled method or other host functions. +Here is a high-level summary of what types of parameters exist. - - - - - - - \ No newline at end of file +## Action Costs + +Actions are executed in two steps. First, an action is verified and inserted to +an action receipt, which is sent to the receiver of the action. The `send` fee +is paid for this. It is charged either in `fn process_transaction(..)` if the +action is part of a fresh transaction, or inside +[logic.rs](https://github.com/near/nearcore/blob/14b8ae2c7465444c9b672a23b044c00be98f6e34/runtime/near-vm-logic/src/logic.rs) +through `fn pay_action_base(..)` if the action is generated by a function call. +The send fee is meant to cover the cost to validate an action and transmit it +over the network. It can be different for local receipts (sender = receiver). + +The second step is action execution. It is charged in `fn apply_action(..)`. +The execution cost has to cover everything required to apply the action to the +blockchain's state. + +In conclusion, each action parameter is split into three costs, `send_sir`, +`send_not_sir`, and `execution`. (`sir` = "sender is receiver") Local receipts +charge the first and last parameters, remote receipts charge the second and +third. They should be estimated, defined, and charged separately. But the +reality is that today almost all actions are estimated as a whole and the +parameters are split 50/50 between send and execution cost, without +discrimination on local vs remote receipts i.e. `send_sir` == `send_not_sir`. + + +## WASM Costs + +Costs that occur while executing a function call on a deployed WASM app (a.k.a. +smart contract) are charged only at the receiver. Thus, they have only one value +to define them, in contrast to action costs. + +## Non-gas parameters + +Not all runtime parameters are directly related to gas costs. Here is a brief +overview. + +- **Gas economics config**: Defines the conversion rate when purchasing gas with + NEAR tokens and how gas rewards are split. +- **Storage usage config**: Costs in tokens, not gas, for storing data on chain. +- **Account creation config**: Rules for account creation. +- **Smart contract limits**: Rules for WASM execution. + +None of the above define any gas costs directly. But there can be interplay +between those parameters and gas costs. For example, the limits on smart +contracts changes the assumptions for how slow a contract compilation could be, +hence it affects the deploy action costs. \ No newline at end of file From df347d5ae9a035f78117ca2926e5295319513680 Mon Sep 17 00:00:00 2001 From: Michal Nazarewicz Date: Wed, 26 Oct 2022 15:56:04 +0100 Subject: [PATCH 032/103] o11y: introduce pretty::Slice for formatting slices of arbitrary types (#7935) This replaces logging::pretty_vec from near-primitives-core crate which was the last remaining function in that module. --- core/o11y/src/pretty.rs | 52 +++++++++++++++++++++++++++++ core/primitives-core/src/lib.rs | 1 - core/primitives-core/src/logging.rs | 16 --------- core/primitives/src/lib.rs | 1 - core/primitives/src/transaction.rs | 5 ++- core/primitives/src/views.rs | 6 +--- 6 files changed, 55 insertions(+), 26 deletions(-) delete mode 100644 core/primitives-core/src/logging.rs diff --git a/core/o11y/src/pretty.rs b/core/o11y/src/pretty.rs index e063038bc30..697e6eeb138 100644 --- a/core/o11y/src/pretty.rs +++ b/core/o11y/src/pretty.rs @@ -101,6 +101,40 @@ impl<'a> std::fmt::Display for StorageKey<'a> { } } +/// A wrapper for slices which formats the slice limiting the length. +/// +/// If the slice has no more than five elements, it’s printed in full. +/// Otherwise, only the first two and last two elements are printed to limit the +/// length of the formatted value. +pub struct Slice<'a, T>(pub &'a [T]); + +impl<'a, T: std::fmt::Debug> std::fmt::Debug for Slice<'a, T> { + fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let slice = self.0; + let len = slice.len(); + if len <= 5 { + return std::fmt::Debug::fmt(&slice, fmt); + } + + struct Ellipsis; + + impl std::fmt::Debug for Ellipsis { + fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + fmt.write_str("…") + } + } + + write!(fmt, "({len})")?; + fmt.debug_list() + .entry(&slice[0]) + .entry(&slice[1]) + .entry(&Ellipsis) + .entry(&slice[len - 2]) + .entry(&slice[len - 1]) + .finish() + } +} + /// Implementation of [`Bytes`] and [`StorageKey`] formatting. /// /// If the `consider_hash` argument is false, formats bytes as described in @@ -212,3 +246,21 @@ fn test_truncated_bytes() { fn test_storage_key() { do_test_bytes_formatting!(StorageKey, true, false); } + +#[test] +fn test_slice() { + macro_rules! test { + ($want:literal, $fmt:literal, $len:expr) => { + assert_eq!( + $want, + format!($fmt, Slice(&[0u8, 11, 22, 33, 44, 55, 66, 77, 88, 99][..$len])) + ) + }; + } + + test!("[]", "{:?}", 0); + test!("[0, 11, 22, 33]", "{:?}", 4); + test!("[0, b, 16, 21]", "{:x?}", 4); + test!("(10)[0, 11, …, 88, 99]", "{:?}", 10); + test!("(10)[0, b, …, 58, 63]", "{:x?}", 10); +} diff --git a/core/primitives-core/src/lib.rs b/core/primitives-core/src/lib.rs index 3c3b9ab168f..8b24882a6a8 100644 --- a/core/primitives-core/src/lib.rs +++ b/core/primitives-core/src/lib.rs @@ -5,7 +5,6 @@ pub mod account; pub mod config; pub mod contract; pub mod hash; -pub mod logging; pub mod parameter; pub mod profile; pub mod runtime; diff --git a/core/primitives-core/src/logging.rs b/core/primitives-core/src/logging.rs deleted file mode 100644 index 93bdfad1f38..00000000000 --- a/core/primitives-core/src/logging.rs +++ /dev/null @@ -1,16 +0,0 @@ -use std::fmt::Debug; - -pub fn pretty_vec(buf: &[T]) -> String { - if buf.len() <= 5 { - format!("{:#?}", buf) - } else { - format!( - "({})[{:#?}, {:#?}, … {:#?}, {:#?}]", - buf.len(), - buf[0], - buf[1], - buf[buf.len() - 2], - buf[buf.len() - 1] - ) - } -} diff --git a/core/primitives/src/lib.rs b/core/primitives/src/lib.rs index 710d394dd95..8da63da6372 100644 --- a/core/primitives/src/lib.rs +++ b/core/primitives/src/lib.rs @@ -3,7 +3,6 @@ pub use near_primitives_core::borsh; pub use near_primitives_core::config; pub use near_primitives_core::contract; pub use near_primitives_core::hash; -pub use near_primitives_core::logging; pub use near_primitives_core::num_rational; pub use near_primitives_core::profile; pub use near_primitives_core::serialize; diff --git a/core/primitives/src/transaction.rs b/core/primitives/src/transaction.rs index 05b4be3098b..46f82880ef4 100644 --- a/core/primitives/src/transaction.rs +++ b/core/primitives/src/transaction.rs @@ -12,7 +12,6 @@ use near_primitives_core::profile::ProfileData; use crate::account::AccessKey; use crate::errors::TxExecutionError; use crate::hash::{hash, CryptoHash}; -use crate::logging; use crate::merkle::MerklePath; use crate::serialize::{base64_format, dec_format}; use crate::types::{AccountId, Balance, Gas, Nonce}; @@ -383,8 +382,8 @@ impl Default for ExecutionMetadata { impl fmt::Debug for ExecutionOutcome { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("ExecutionOutcome") - .field("logs", &format_args!("{}", logging::pretty_vec(&self.logs))) - .field("receipt_ids", &format_args!("{}", logging::pretty_vec(&self.receipt_ids))) + .field("logs", &pretty::Slice(&self.logs)) + .field("receipt_ids", &pretty::Slice(&self.receipt_ids)) .field("burnt_gas", &self.gas_burnt) .field("tokens_burnt", &self.tokens_burnt) .field("status", &self.status) diff --git a/core/primitives/src/views.rs b/core/primitives/src/views.rs index 07e3eb0767a..ec6f7865f00 100644 --- a/core/primitives/src/views.rs +++ b/core/primitives/src/views.rs @@ -24,7 +24,6 @@ use crate::challenge::{Challenge, ChallengesResult}; use crate::contract::ContractCode; use crate::errors::TxExecutionError; use crate::hash::{hash, CryptoHash}; -use crate::logging; use crate::merkle::{combine_hash, MerklePath}; use crate::network::PeerId; use crate::profile::Cost; @@ -1376,10 +1375,7 @@ impl fmt::Debug for FinalExecutionOutcomeView { .field("status", &self.status) .field("transaction", &self.transaction) .field("transaction_outcome", &self.transaction_outcome) - .field( - "receipts_outcome", - &format_args!("{}", logging::pretty_vec(&self.receipts_outcome)), - ) + .field("receipts_outcome", &pretty::Slice(&self.receipts_outcome)) .finish() } } From 1f075abfd9de9d3af3241c7240dd94dc354af431 Mon Sep 17 00:00:00 2001 From: pompon0 Date: Wed, 26 Oct 2022 18:47:47 +0200 Subject: [PATCH 033/103] split Network(View)ClientMessage enum into separate actix messages. (#7895) I've split Network(View)ClientMessage into separate messages so that the handlers can have separate result types (stronger typing). --- chain/client/src/adapter.rs | 367 ++++----- chain/client/src/client.rs | 30 +- chain/client/src/client_actor.rs | 727 ++++++++++-------- chain/client/src/lib.rs | 3 + chain/client/src/test_utils.rs | 143 ++-- chain/client/src/tests/bug_repros.rs | 36 +- chain/client/src/tests/catching_up.rs | 4 +- chain/client/src/tests/consensus.rs | 19 +- chain/client/src/tests/cross_shard_tx.rs | 8 +- chain/client/src/tests/query_client.rs | 52 +- chain/client/src/view_client.rs | 510 +++++++----- chain/jsonrpc/src/lib.rs | 119 ++- chain/rosetta-rpc/src/lib.rs | 7 +- .../src/tests/client/chunks_management.rs | 9 +- .../access_key_nonce_for_implicit_accounts.rs | 17 +- .../account_id_in_function_call_permission.rs | 8 +- .../src/tests/client/process_blocks.rs | 103 +-- .../src/tests/nearcore/stake_nodes.rs | 11 +- .../src/tests/nearcore/sync_nodes.rs | 13 +- tools/mirror/src/lib.rs | 8 +- tools/mock-node/src/lib.rs | 102 +-- tools/mock-node/src/setup.rs | 16 +- tools/state-viewer/src/apply_chunk.rs | 4 +- 23 files changed, 1190 insertions(+), 1126 deletions(-) diff --git a/chain/client/src/adapter.rs b/chain/client/src/adapter.rs index d53a4d371dd..5a5836725b9 100644 --- a/chain/client/src/adapter.rs +++ b/chain/client/src/adapter.rs @@ -16,54 +16,113 @@ use near_primitives::transaction::SignedTransaction; use near_primitives::types::{AccountId, EpochId, ShardId}; use near_primitives::views::FinalExecutionOutcomeView; -#[derive(actix::Message, Debug, strum::AsRefStr, strum::IntoStaticStr)] -// TODO(#1313): Use Box -#[allow(clippy::large_enum_variant)] -#[rtype(result = "NetworkClientResponses")] -pub enum NetworkClientMessages { - #[cfg(feature = "test_features")] - Adversarial(near_network::types::NetworkAdversarialMessage), - - /// Received transaction. - Transaction { - transaction: SignedTransaction, - /// Whether the transaction is forwarded from other nodes. - is_forwarded: bool, - /// Whether the transaction needs to be submitted. - check_only: bool, - }, - /// Received block, possibly requested. - Block(Block, PeerId, bool), - /// Received list of headers for syncing. - BlockHeaders(Vec, PeerId), - /// Block approval. - BlockApproval(Approval, PeerId), - /// State response. - StateResponse(StateResponseInfo), - - /// Request chunk parts and/or receipts. - PartialEncodedChunkRequest(PartialEncodedChunkRequestMsg, CryptoHash), - /// Response to a request for chunk parts and/or receipts. - PartialEncodedChunkResponse(PartialEncodedChunkResponseMsg, std::time::Instant), - /// Information about chunk such as its header, some subset of parts and/or incoming receipts - PartialEncodedChunk(PartialEncodedChunk), - /// Forwarding parts to those tracking the shard (so they don't need to send requests) - PartialEncodedChunkForward(PartialEncodedChunkForwardMsg), - - /// A challenge to invalidate the block. - Challenge(Challenge), - - NetworkInfo(NetworkInfo), +/// Transaction status query +#[derive(actix::Message)] +#[rtype(result = "Option>")] +pub(crate) struct TxStatusRequest { + pub tx_hash: CryptoHash, + pub signer_account_id: AccountId, } -// TODO(#1313): Use Box -#[derive(Eq, PartialEq, Debug, actix::MessageResponse)] -#[allow(clippy::large_enum_variant)] -pub enum NetworkClientResponses { - /// Adv controls. - #[cfg(feature = "test_features")] - AdvResult(u64), +/// Transaction status response +#[derive(actix::Message)] +#[rtype(result = "()")] +pub(crate) struct TxStatusResponse(pub Box); +/// Request a block. +#[derive(actix::Message)] +#[rtype(result = "Option>")] +pub(crate) struct BlockRequest(pub CryptoHash); + +/// Block response. +#[derive(actix::Message, Debug)] +#[rtype(result = "()")] +pub struct BlockResponse { + pub block: Block, + pub peer_id: PeerId, + pub was_requested: bool, +} + +#[derive(actix::Message, Debug)] +#[rtype(result = "()")] +pub struct BlockApproval(pub Approval, pub PeerId); + +/// Request headers. +#[derive(actix::Message)] +#[rtype(result = "Option>")] +pub(crate) struct BlockHeadersRequest(pub Vec); + +/// Headers response. +#[derive(actix::Message, Debug)] +#[rtype(result = "Result<(),ReasonForBan>")] +pub(crate) struct BlockHeadersResponse(pub Vec, pub PeerId); + +/// State request header. +#[derive(actix::Message)] +#[rtype(result = "Option")] +pub(crate) struct StateRequestHeader { + pub shard_id: ShardId, + pub sync_hash: CryptoHash, +} + +/// State request part. +#[derive(actix::Message)] +#[rtype(result = "Option")] +pub(crate) struct StateRequestPart { + pub shard_id: ShardId, + pub sync_hash: CryptoHash, + pub part_id: u64, +} + +/// Response to state request. +#[derive(actix::Message, Debug)] +#[rtype(result = "()")] +pub(crate) struct StateResponse(pub Box); + +/// Account announcements that needs to be validated before being processed. +/// They are paired with last epoch id known to this announcement, in order to accept only +/// newer announcements. +#[derive(actix::Message)] +#[rtype(result = "Result,ReasonForBan>")] +pub(crate) struct AnnounceAccountRequest(pub Vec<(AnnounceAccount, Option)>); + +#[derive(actix::Message, Debug)] +#[rtype(result = "()")] +pub struct SetNetworkInfo(pub NetworkInfo); + +#[derive(actix::Message, Debug)] +#[rtype(result = "()")] +pub(crate) struct RecvChallenge(pub Challenge); + +#[derive(actix::Message, Debug)] +#[rtype(result = "()")] +pub(crate) struct RecvPartialEncodedChunkForward(pub PartialEncodedChunkForwardMsg); + +#[derive(actix::Message, Debug)] +#[rtype(result = "()")] +pub(crate) struct RecvPartialEncodedChunk(pub PartialEncodedChunk); + +#[derive(actix::Message, Debug)] +#[rtype(result = "()")] +pub(crate) struct RecvPartialEncodedChunkResponse( + pub PartialEncodedChunkResponseMsg, + pub std::time::Instant, +); + +#[derive(actix::Message, Debug)] +#[rtype(result = "()")] +pub(crate) struct RecvPartialEncodedChunkRequest(pub PartialEncodedChunkRequestMsg, pub CryptoHash); + +#[derive(actix::Message, Debug)] +#[rtype(result = "ProcessTxResponse")] +pub struct ProcessTxRequest { + pub transaction: SignedTransaction, + pub is_forwarded: bool, + pub check_only: bool, +} + +#[derive(actix::MessageResponse, Debug, PartialEq, Eq)] +pub enum ProcessTxResponse { /// No response. NoResponse, /// Valid transaction inserted into mempool as response to Transaction. @@ -75,50 +134,6 @@ pub enum NetworkClientResponses { /// The node being queried does not track the shard needed and therefore cannot provide userful /// response. DoesNotTrackShard, - /// Ban peer for malicious behavior. - Ban { ban_reason: ReasonForBan }, -} - -#[derive(actix::Message, strum::IntoStaticStr)] -#[rtype(result = "NetworkViewClientResponses")] -pub enum NetworkViewClientMessages { - #[cfg(feature = "test_features")] - Adversarial(near_network::types::NetworkAdversarialMessage), - - /// Transaction status query - TxStatus { tx_hash: CryptoHash, signer_account_id: AccountId }, - /// Transaction status response - TxStatusResponse(Box), - /// Request a block. - BlockRequest(CryptoHash), - /// Request headers. - BlockHeadersRequest(Vec), - /// State request header. - StateRequestHeader { shard_id: ShardId, sync_hash: CryptoHash }, - /// State request part. - StateRequestPart { shard_id: ShardId, sync_hash: CryptoHash, part_id: u64 }, - /// Account announcements that needs to be validated before being processed. - /// They are paired with last epoch id known to this announcement, in order to accept only - /// newer announcements. - AnnounceAccount(Vec<(AnnounceAccount, Option)>), -} - -#[derive(Debug, actix::MessageResponse)] -pub enum NetworkViewClientResponses { - /// Transaction execution outcome - TxStatus(Box), - /// Block response. - Block(Box), - /// Headers response. - BlockHeaders(Vec), - /// Response to state request. - StateResponse(Box), - /// Valid announce accounts. - AnnounceAccount(Vec), - /// Ban peer for malicious behavior. - Ban { ban_reason: ReasonForBan }, - /// Response not needed - NoResponse, } pub struct Adapter { @@ -147,17 +162,12 @@ impl near_network::client::Client for Adapter { match self .view_client_addr .send( - NetworkViewClientMessages::TxStatus { - tx_hash: tx_hash, - signer_account_id: account_id, - } - .with_span_context(), + TxStatusRequest { tx_hash: tx_hash, signer_account_id: account_id } + .with_span_context(), ) .await { - Ok(NetworkViewClientResponses::TxStatus(tx_result)) => Some(tx_result), - Ok(NetworkViewClientResponses::NoResponse) => None, - Ok(resp) => panic!("unexpected ViewClientResponse: {resp:?}"), + Ok(res) => res, Err(err) => { tracing::error!("mailbox error: {err}"); None @@ -168,15 +178,13 @@ impl near_network::client::Client for Adapter { async fn tx_status_response(&self, tx_result: FinalExecutionOutcomeView) { match self .view_client_addr - .send( - NetworkViewClientMessages::TxStatusResponse(Box::new(tx_result.clone())) - .with_span_context(), - ) + .send(TxStatusResponse(Box::new(tx_result.clone())).with_span_context()) .await { - Ok(NetworkViewClientResponses::NoResponse) => {} - Ok(resp) => panic!("unexpected ViewClientResponse: {resp:?}"), - Err(err) => tracing::error!("mailbox error: {err}"), + Ok(()) => {} + Err(err) => { + tracing::error!("mailbox error: {err}"); + } } } @@ -188,18 +196,12 @@ impl near_network::client::Client for Adapter { match self .view_client_addr .send( - NetworkViewClientMessages::StateRequestHeader { - shard_id: shard_id, - sync_hash: sync_hash, - } - .with_span_context(), + StateRequestHeader { shard_id: shard_id, sync_hash: sync_hash }.with_span_context(), ) .await { - Ok(NetworkViewClientResponses::StateResponse(resp)) => Ok(Some(*resp)), - Ok(NetworkViewClientResponses::NoResponse) => Ok(None), - Ok(NetworkViewClientResponses::Ban { ban_reason }) => Err(ban_reason), - Ok(resp) => panic!("unexpected ViewClientResponse: {resp:?}"), + Ok(Some(StateResponse(resp))) => Ok(Some(*resp)), + Ok(None) => Ok(None), Err(err) => { tracing::error!("mailbox error: {err}"); Ok(None) @@ -216,19 +218,13 @@ impl near_network::client::Client for Adapter { match self .view_client_addr .send( - NetworkViewClientMessages::StateRequestPart { - shard_id: shard_id, - sync_hash: sync_hash, - part_id: part_id, - } - .with_span_context(), + StateRequestPart { shard_id: shard_id, sync_hash: sync_hash, part_id: part_id } + .with_span_context(), ) .await { - Ok(NetworkViewClientResponses::StateResponse(resp)) => Ok(Some(*resp)), - Ok(NetworkViewClientResponses::NoResponse) => Ok(None), - Ok(NetworkViewClientResponses::Ban { ban_reason }) => Err(ban_reason), - Ok(resp) => panic!("unexpected ViewClientResponse: {resp:?}"), + Ok(Some(StateResponse(resp))) => Ok(Some(*resp)), + Ok(None) => Ok(None), Err(err) => { tracing::error!("mailbox error: {err}"); Ok(None) @@ -237,25 +233,15 @@ impl near_network::client::Client for Adapter { } async fn state_response(&self, info: StateResponseInfo) { - match self - .client_addr - .send(NetworkClientMessages::StateResponse(info).with_span_context()) - .await - { - Ok(NetworkClientResponses::NoResponse) => {} - Ok(resp) => panic!("unexpected ClientResponse: {resp:?}"), + match self.client_addr.send(StateResponse(Box::new(info)).with_span_context()).await { + Ok(()) => {} Err(err) => tracing::error!("mailbox error: {err}"), } } async fn block_approval(&self, approval: Approval, peer_id: PeerId) { - match self - .client_addr - .send(NetworkClientMessages::BlockApproval(approval, peer_id).with_span_context()) - .await - { - Ok(NetworkClientResponses::NoResponse) => {} - Ok(resp) => panic!("unexpected ClientResponse: {resp:?}"), + match self.client_addr.send(BlockApproval(approval, peer_id).with_span_context()).await { + Ok(()) => {} Err(err) => tracing::error!("mailbox error: {err}"), } } @@ -264,19 +250,19 @@ impl near_network::client::Client for Adapter { match self .client_addr .send( - NetworkClientMessages::Transaction { transaction, is_forwarded, check_only: false } + ProcessTxRequest { transaction, is_forwarded, check_only: false } .with_span_context(), ) .await { - // Almost all variants of NetworkClientResponse are used only in response - // to NetworkClientMessages::Transaction (except for Ban). It will be clearer - // once NetworkClientMessage is split into separate requests. - Ok(resp @ NetworkClientResponses::Ban { .. }) => { - panic!("unexpected ClientResponse: {resp:?}") + Ok(ProcessTxResponse::InvalidTx(err)) => { + tracing::warn!(target: "network", ?err, "Received invalid tx"); + // TODO: count as malicious behavior? } Ok(_) => {} - Err(err) => tracing::error!("mailbox error: {err}"), + Err(err) => { + tracing::error!("mailbox error: {err}"); + } } } @@ -287,14 +273,10 @@ impl near_network::client::Client for Adapter { ) { match self .client_addr - .send( - NetworkClientMessages::PartialEncodedChunkRequest(req, msg_hash) - .with_span_context(), - ) + .send(RecvPartialEncodedChunkRequest(req, msg_hash).with_span_context()) .await { - Ok(NetworkClientResponses::NoResponse) => {} - Ok(resp) => panic!("unexpected ClientResponse: {resp:?}"), + Ok(()) => {} Err(err) => tracing::error!("mailbox error: {err}"), } } @@ -306,51 +288,31 @@ impl near_network::client::Client for Adapter { ) { match self .client_addr - .send( - NetworkClientMessages::PartialEncodedChunkResponse(resp, timestamp.into()) - .with_span_context(), - ) + .send(RecvPartialEncodedChunkResponse(resp, timestamp.into()).with_span_context()) .await { - Ok(NetworkClientResponses::NoResponse) => {} - Ok(resp) => panic!("unexpected ClientResponse: {resp:?}"), + Ok(()) => {} Err(err) => tracing::error!("mailbox error: {err}"), } } async fn partial_encoded_chunk(&self, chunk: PartialEncodedChunk) { - match self - .client_addr - .send(NetworkClientMessages::PartialEncodedChunk(chunk).with_span_context()) - .await - { - Ok(NetworkClientResponses::NoResponse) => {} - Ok(resp) => panic!("unexpected ClientResponse: {resp:?}"), + match self.client_addr.send(RecvPartialEncodedChunk(chunk).with_span_context()).await { + Ok(()) => {} Err(err) => tracing::error!("mailbox error: {err}"), } } async fn partial_encoded_chunk_forward(&self, msg: PartialEncodedChunkForwardMsg) { - match self - .client_addr - .send(NetworkClientMessages::PartialEncodedChunkForward(msg).with_span_context()) - .await - { - Ok(NetworkClientResponses::NoResponse) => {} - Ok(resp) => panic!("unexpected ClientResponse: {resp:?}"), + match self.client_addr.send(RecvPartialEncodedChunkForward(msg).with_span_context()).await { + Ok(()) => {} Err(err) => tracing::error!("mailbox error: {err}"), } } async fn block_request(&self, hash: CryptoHash) -> Option> { - match self - .view_client_addr - .send(NetworkViewClientMessages::BlockRequest(hash).with_span_context()) - .await - { - Ok(NetworkViewClientResponses::Block(block)) => Some(block), - Ok(NetworkViewClientResponses::NoResponse) => None, - Ok(resp) => panic!("unexpected ViewClientResponse: {resp:?}"), + match self.view_client_addr.send(BlockRequest(hash).with_span_context()).await { + Ok(res) => res, Err(err) => { tracing::error!("mailbox error: {err}"); None @@ -359,14 +321,8 @@ impl near_network::client::Client for Adapter { } async fn block_headers_request(&self, hashes: Vec) -> Option> { - match self - .view_client_addr - .send(NetworkViewClientMessages::BlockHeadersRequest(hashes).with_span_context()) - .await - { - Ok(NetworkViewClientResponses::BlockHeaders(block_headers)) => Some(block_headers), - Ok(NetworkViewClientResponses::NoResponse) => None, - Ok(resp) => panic!("unexpected ViewClientResponse: {resp:?}"), + match self.view_client_addr.send(BlockHeadersRequest(hashes).with_span_context()).await { + Ok(headers) => headers, Err(err) => { tracing::error!("mailbox error: {err}"); None @@ -377,11 +333,10 @@ impl near_network::client::Client for Adapter { async fn block(&self, block: Block, peer_id: PeerId, was_requested: bool) { match self .client_addr - .send(NetworkClientMessages::Block(block, peer_id, was_requested).with_span_context()) + .send(BlockResponse { block, peer_id, was_requested }.with_span_context()) .await { - Ok(NetworkClientResponses::NoResponse) => {} - Ok(resp) => panic!("unexpected ClientResponse: {resp:?}"), + Ok(()) => {} Err(err) => tracing::error!("mailbox error: {err}"), } } @@ -393,12 +348,10 @@ impl near_network::client::Client for Adapter { ) -> Result<(), ReasonForBan> { match self .client_addr - .send(NetworkClientMessages::BlockHeaders(headers, peer_id).with_span_context()) + .send(BlockHeadersResponse(headers, peer_id).with_span_context()) .await { - Ok(NetworkClientResponses::NoResponse) => Ok(()), - Ok(NetworkClientResponses::Ban { ban_reason }) => Err(ban_reason), - Ok(resp) => panic!("unexpected ClientResponse: {resp:?}"), + Ok(res) => res, Err(err) => { tracing::error!("mailbox error: {err}"); Ok(()) @@ -407,25 +360,15 @@ impl near_network::client::Client for Adapter { } async fn challenge(&self, challenge: Challenge) { - match self - .client_addr - .send(NetworkClientMessages::Challenge(challenge).with_span_context()) - .await - { - Ok(NetworkClientResponses::NoResponse) => {} - Ok(resp) => panic!("unexpected ClientResponse: {resp:?}"), + match self.client_addr.send(RecvChallenge(challenge).with_span_context()).await { + Ok(()) => {} Err(err) => tracing::error!("mailbox error: {err}"), } } async fn network_info(&self, info: NetworkInfo) { - match self - .client_addr - .send(NetworkClientMessages::NetworkInfo(info).with_span_context()) - .await - { - Ok(NetworkClientResponses::NoResponse) => {} - Ok(resp) => panic!("unexpected ClientResponse: {resp:?}"), + match self.client_addr.send(SetNetworkInfo(info).with_span_context()).await { + Ok(()) => {} Err(err) => tracing::error!("mailbox error: {err}"), } } @@ -434,15 +377,9 @@ impl near_network::client::Client for Adapter { &self, accounts: Vec<(AnnounceAccount, Option)>, ) -> Result, ReasonForBan> { - match self - .view_client_addr - .send(NetworkViewClientMessages::AnnounceAccount(accounts).with_span_context()) - .await + match self.view_client_addr.send(AnnounceAccountRequest(accounts).with_span_context()).await { - Ok(NetworkViewClientResponses::AnnounceAccount(accounts)) => Ok(accounts), - Ok(NetworkViewClientResponses::NoResponse) => Ok(vec![]), - Ok(NetworkViewClientResponses::Ban { ban_reason }) => Err(ban_reason), - Ok(resp) => panic!("unexpected ClientResponse: {resp:?}"), + Ok(res) => res, Err(err) => { tracing::error!("mailbox error: {err}"); Ok(vec![]) diff --git a/chain/client/src/client.rs b/chain/client/src/client.rs index 39f278803df..cefc6a0077b 100644 --- a/chain/client/src/client.rs +++ b/chain/client/src/client.rs @@ -14,7 +14,6 @@ use near_client_primitives::debug::ChunkProduction; use near_primitives::time::Clock; use tracing::{debug, error, info, trace, warn}; -use crate::adapter::NetworkClientResponses; use near_chain::chain::{ ApplyStatePartsRequest, BlockCatchUpRequest, BlockMissingChunks, BlocksCatchUpState, OrphanMissingChunks, StateSplitRequest, TX_ROUTING_HEIGHT_HORIZON, @@ -44,6 +43,7 @@ use near_primitives::unwrap_or_return; use near_primitives::utils::MaybeValidated; use near_primitives::validator_signer::ValidatorSigner; +use crate::adapter::ProcessTxResponse; use crate::debug::BlockProductionTracker; use crate::debug::PRODUCTION_TIMES_CACHE_SIZE; use crate::sync::{BlockSync, EpochSync, HeaderSync, StateSync, StateSyncResult}; @@ -1717,11 +1717,11 @@ impl Client { tx: SignedTransaction, is_forwarded: bool, check_only: bool, - ) -> NetworkClientResponses { + ) -> ProcessTxResponse { unwrap_or_return!(self.process_tx_internal(&tx, is_forwarded, check_only), { let me = self.validator_signer.as_ref().map(|vs| vs.validator_id()); warn!(target: "client", "I'm: {:?} Dropping tx: {:?}", me, tx); - NetworkClientResponses::NoResponse + ProcessTxResponse::NoResponse }) } @@ -1763,7 +1763,7 @@ impl Client { tx: &SignedTransaction, is_forwarded: bool, check_only: bool, - ) -> Result { + ) -> Result { let head = self.chain.head()?; let me = self.validator_signer.as_ref().map(|vs| vs.validator_id()); let cur_block_header = self.chain.head_header()?; @@ -1777,7 +1777,7 @@ impl Client { transaction_validity_period, ) { debug!(target: "client", "Invalid tx: expired or from a different fork -- {:?}", tx); - return Ok(NetworkClientResponses::InvalidTx(e)); + return Ok(ProcessTxResponse::InvalidTx(e)); } let gas_price = cur_block_header.gas_price(); let epoch_id = self.runtime_adapter.get_epoch_id_from_prev_block(&head.last_block_hash)?; @@ -1790,7 +1790,7 @@ impl Client { .expect("no storage errors") { debug!(target: "client", "Invalid tx during basic validation: {:?}", err); - return Ok(NetworkClientResponses::InvalidTx(err)); + return Ok(ProcessTxResponse::InvalidTx(err)); } let shard_id = @@ -1808,7 +1808,7 @@ impl Client { return Err(Error::Other("Node has not caught up yet".to_string())); } else { self.forward_tx(&epoch_id, tx)?; - return Ok(NetworkClientResponses::RequestRouted); + return Ok(ProcessTxResponse::RequestRouted); } } }; @@ -1818,9 +1818,9 @@ impl Client { .expect("no storage errors") { debug!(target: "client", "Invalid tx: {:?}", err); - Ok(NetworkClientResponses::InvalidTx(err)) + Ok(ProcessTxResponse::InvalidTx(err)) } else if check_only { - Ok(NetworkClientResponses::ValidTx) + Ok(ProcessTxResponse::ValidTx) } else { let active_validator = self.active_validator(shard_id)?; @@ -1842,30 +1842,30 @@ impl Client { if !is_forwarded { self.possibly_forward_tx_to_next_epoch(tx)?; } - Ok(NetworkClientResponses::ValidTx) + Ok(ProcessTxResponse::ValidTx) } else if !is_forwarded { trace!(target: "client", shard_id, "Forwarding a transaction."); metrics::TRANSACTION_RECEIVED_NON_VALIDATOR.inc(); self.forward_tx(&epoch_id, tx)?; - Ok(NetworkClientResponses::RequestRouted) + Ok(ProcessTxResponse::RequestRouted) } else { trace!(target: "client", shard_id, "Non-validator received a forwarded transaction, dropping it."); metrics::TRANSACTION_RECEIVED_NON_VALIDATOR_FORWARDED.inc(); - Ok(NetworkClientResponses::NoResponse) + Ok(ProcessTxResponse::NoResponse) } } } else if check_only { - Ok(NetworkClientResponses::DoesNotTrackShard) + Ok(ProcessTxResponse::DoesNotTrackShard) } else { if is_forwarded { // received forwarded transaction but we are not tracking the shard debug!(target: "client", "Received forwarded transaction but no tracking shard {}, I'm {:?}", shard_id, me); - return Ok(NetworkClientResponses::NoResponse); + return Ok(ProcessTxResponse::NoResponse); } // We are not tracking this shard, so there is no way to validate this tx. Just rerouting. self.forward_tx(&epoch_id, tx)?; - Ok(NetworkClientResponses::RequestRouted) + Ok(ProcessTxResponse::RequestRouted) } } diff --git a/chain/client/src/client_actor.rs b/chain/client/src/client_actor.rs index 07bbfd7db01..9819310e856 100644 --- a/chain/client/src/client_actor.rs +++ b/chain/client/src/client_actor.rs @@ -5,7 +5,11 @@ //! Unfortunately, this is not the case today. We are in the process of refactoring ClientActor //! https://github.com/near/nearcore/issues/7899 -use crate::adapter::{NetworkClientMessages, NetworkClientResponses}; +use crate::adapter::{ + BlockApproval, BlockHeadersResponse, BlockResponse, ProcessTxRequest, ProcessTxResponse, + RecvChallenge, RecvPartialEncodedChunk, RecvPartialEncodedChunkForward, + RecvPartialEncodedChunkRequest, RecvPartialEncodedChunkResponse, SetNetworkInfo, StateResponse, +}; use crate::client::{Client, EPOCH_START_INFO_BLOCKS}; use crate::info::{ display_sync_status, get_validator_epoch_stats, InfoHelper, ValidatorInfoHelper, @@ -36,6 +40,8 @@ use near_client_primitives::types::{ Error, GetNetworkInfo, NetworkInfoResponse, ShardSyncDownload, ShardSyncStatus, Status, StatusError, StatusSyncInfo, SyncStatus, }; +#[cfg(feature = "test_features")] +use near_network::types::NetworkAdversarialMessage; use near_network::types::ReasonForBan; use near_network::types::{ NetworkInfo, NetworkRequests, PeerManagerAdapter, PeerManagerMessageRequest, @@ -260,369 +266,456 @@ impl Actor for ClientActor { } } -impl Handler> for ClientActor { - type Result = NetworkClientResponses; - - #[perf] - fn handle( +impl ClientActor { + fn wrap( &mut self, - msg: WithSpanContext, + msg: WithSpanContext, ctx: &mut Context, - ) -> Self::Result { - let msg_type: &'static str = (&msg.msg).into(); + msg_type: &str, + f: impl FnOnce(&mut Self, Req) -> Res, + ) -> Res { let (_span, msg) = handler_debug_span!(target: "client", msg, msg_type); - self.check_triggers(ctx); - - let _d = delay_detector::DelayDetector::new(|| { - format!("NetworkClientMessage {}", msg.as_ref()).into() - }); - metrics::CLIENT_MESSAGES_COUNT.with_label_values(&[msg.as_ref()]).inc(); - let timer = metrics::CLIENT_MESSAGES_PROCESSING_TIME - .with_label_values(&[msg.as_ref()]) - .start_timer(); - let res = self.handle_client_messages(msg); + let _d = + delay_detector::DelayDetector::new(|| format!("NetworkClientMessage {:?}", msg).into()); + metrics::CLIENT_MESSAGES_COUNT.with_label_values(&[msg_type]).inc(); + let timer = + metrics::CLIENT_MESSAGES_PROCESSING_TIME.with_label_values(&[msg_type]).start_timer(); + let res = f(self, msg); timer.observe_duration(); res } } -impl ClientActor { - fn handle_client_messages(&mut self, msg: NetworkClientMessages) -> NetworkClientResponses { - match msg { - #[cfg(feature = "test_features")] - NetworkClientMessages::Adversarial(adversarial_msg) => { - return match adversarial_msg { - near_network::types::NetworkAdversarialMessage::AdvDisableDoomslug => { - info!(target: "adversary", "Turning Doomslug off"); - self.adv.set_disable_doomslug(true); - self.client.doomslug.adv_disable(); - self.client.chain.adv_disable_doomslug(); - NetworkClientResponses::NoResponse - } - near_network::types::NetworkAdversarialMessage::AdvDisableHeaderSync => { - info!(target: "adversary", "Blocking header sync"); - self.adv.set_disable_header_sync(true); - NetworkClientResponses::NoResponse - } - near_network::types::NetworkAdversarialMessage::AdvProduceBlocks( - num_blocks, - only_valid, - ) => { - info!(target: "adversary", "Producing {} blocks", num_blocks); - self.client.adv_produce_blocks = true; - self.client.adv_produce_blocks_only_valid = only_valid; - let start_height = - self.client.chain.mut_store().get_latest_known().unwrap().height + 1; - let mut blocks_produced = 0; - for height in start_height.. { - let block = self - .client - .produce_block(height) - .expect("block should be produced"); - if only_valid && block == None { - continue; - } - let block = block.expect("block should exist after produced"); - info!(target: "adversary", "Producing {} block out of {}, height = {}", blocks_produced, num_blocks, height); - self.network_adapter.do_send( - PeerManagerMessageRequest::NetworkRequests( - NetworkRequests::Block { block: block.clone() }, - ) - .with_span_context(), - ); - let _ = self.client.start_process_block( - block.into(), - Provenance::PRODUCED, - self.get_apply_chunks_done_callback(), - ); - blocks_produced += 1; - if blocks_produced == num_blocks { - break; - } - } - NetworkClientResponses::NoResponse - } - near_network::types::NetworkAdversarialMessage::AdvSwitchToHeight(height) => { - info!(target: "adversary", "Switching to height {:?}", height); - let mut chain_store_update = self.client.chain.mut_store().store_update(); - chain_store_update.save_largest_target_height(height); - chain_store_update - .adv_save_latest_known(height) - .expect("adv method should not fail"); - chain_store_update.commit().expect("adv method should not fail"); - NetworkClientResponses::NoResponse - } - near_network::types::NetworkAdversarialMessage::AdvSetSyncInfo(height) => { - info!(target: "adversary", %height, "AdvSetSyncInfo"); - self.client.adv_sync_height = Some(height); - self.client.send_network_chain_info().expect("adv method should not fail"); - NetworkClientResponses::NoResponse - } - near_network::types::NetworkAdversarialMessage::AdvGetSavedBlocks => { - info!(target: "adversary", "Requested number of saved blocks"); - let store = self.client.chain.store().store(); - let mut num_blocks = 0; - for _ in store.iter(DBCol::Block) { - num_blocks += 1; - } - NetworkClientResponses::AdvResult(num_blocks) - } - near_network::types::NetworkAdversarialMessage::AdvCheckStorageConsistency => { - // timeout is set to 1.5 seconds to give some room as we wait in Nightly for 2 seconds - let timeout = 1500; - info!(target: "adversary", "Check Storage Consistency, timeout set to {:?} milliseconds", timeout); - let mut genesis = near_chain_configs::GenesisConfig::default(); - genesis.genesis_height = self.client.chain.store().get_genesis_height(); - let mut store_validator = near_chain::store_validator::StoreValidator::new( - self.client.validator_signer.as_ref().map(|x| x.validator_id().clone()), - genesis, - self.client.runtime_adapter.clone(), - self.client.chain.store().store().clone(), - self.adv.is_archival(), - ); - store_validator.set_timeout(timeout); - store_validator.validate(); - if store_validator.is_failed() { - error!(target: "client", "Storage Validation failed, {:?}", store_validator.errors); - NetworkClientResponses::AdvResult(0) - } else { - NetworkClientResponses::AdvResult(store_validator.tests_done()) - } - } - }; +#[cfg(feature = "test_features")] +impl Handler> for ClientActor { + type Result = Option; + + fn handle( + &mut self, + msg: WithSpanContext, + ctx: &mut Context, + ) -> Self::Result { + self.wrap(msg, ctx, "NetworkAdversarialMessage", |this, msg| match msg { + near_network::types::NetworkAdversarialMessage::AdvDisableDoomslug => { + info!(target: "adversary", "Turning Doomslug off"); + this.adv.set_disable_doomslug(true); + this.client.doomslug.adv_disable(); + this.client.chain.adv_disable_doomslug(); + None } - NetworkClientMessages::Transaction { transaction, is_forwarded, check_only } => { - self.client.process_tx(transaction, is_forwarded, check_only) + near_network::types::NetworkAdversarialMessage::AdvDisableHeaderSync => { + info!(target: "adversary", "Blocking header sync"); + this.adv.set_disable_header_sync(true); + None } - NetworkClientMessages::Block(block, peer_id, was_requested) => { - let blocks_at_height = self - .client - .chain - .store() - .get_all_block_hashes_by_height(block.header().height()); - if was_requested || !blocks_at_height.is_ok() { - if let SyncStatus::StateSync(sync_hash, _) = &mut self.client.sync_status { - if let Ok(header) = self.client.chain.get_block_header(sync_hash) { - if block.hash() == header.prev_hash() { - if let Err(e) = self.client.chain.save_block(block.into()) { - error!(target: "client", "Failed to save a block during state sync: {}", e); - } - } else if block.hash() == sync_hash { - // This is the immediate block after a state sync - // We can afford to delay requesting missing chunks for this one block - if let Err(e) = self.client.chain.save_orphan(block.into(), false) { - error!(target: "client", "Received an invalid block during state sync: {}", e); - } - } - return NetworkClientResponses::NoResponse; - } + near_network::types::NetworkAdversarialMessage::AdvProduceBlocks( + num_blocks, + only_valid, + ) => { + info!(target: "adversary", "Producing {} blocks", num_blocks); + this.client.adv_produce_blocks = true; + this.client.adv_produce_blocks_only_valid = only_valid; + let start_height = + this.client.chain.mut_store().get_latest_known().unwrap().height + 1; + let mut blocks_produced = 0; + for height in start_height.. { + let block = this + .client + .produce_block(height) + .expect("block should be produced"); + if only_valid && block == None { + continue; } - self.client.receive_block( - block, - peer_id.clone(), - was_requested, - self.get_apply_chunks_done_callback(), + let block = block.expect("block should exist after produced"); + info!(target: "adversary", "Producing {} block out of {}, height = {}", blocks_produced, num_blocks, height); + this.network_adapter.do_send( + PeerManagerMessageRequest::NetworkRequests( + NetworkRequests::Block { block: block.clone() }, + ) + .with_span_context(), ); - NetworkClientResponses::NoResponse - } else { - match self - .client - .runtime_adapter - .get_epoch_id_from_prev_block(block.header().prev_hash()) - { - Ok(epoch_id) => { - if let Some(hashes) = blocks_at_height.unwrap().get(&epoch_id) { - if !hashes.contains(block.header().hash()) { - warn!(target: "client", "Rejecting unrequested block {}, height {}", block.header().hash(), block.header().height()); - } - } - } - _ => {} + let _ = this.client.start_process_block( + block.into(), + Provenance::PRODUCED, + this.get_apply_chunks_done_callback(), + ); + blocks_produced += 1; + if blocks_produced == num_blocks { + break; } - NetworkClientResponses::NoResponse } + None } - NetworkClientMessages::BlockHeaders(headers, peer_id) => { - if self.receive_headers(headers, peer_id) { - NetworkClientResponses::NoResponse - } else { - warn!(target: "client", "Banning node for sending invalid block headers"); - NetworkClientResponses::Ban { ban_reason: ReasonForBan::BadBlockHeader } - } + near_network::types::NetworkAdversarialMessage::AdvSwitchToHeight(height) => { + info!(target: "adversary", "Switching to height {:?}", height); + let mut chain_store_update = this.client.chain.mut_store().store_update(); + chain_store_update.save_largest_target_height(height); + chain_store_update + .adv_save_latest_known(height) + .expect("adv method should not fail"); + chain_store_update.commit().expect("adv method should not fail"); + None } - NetworkClientMessages::BlockApproval(approval, peer_id) => { - debug!(target: "client", "Receive approval {:?} from peer {:?}", approval, peer_id); - self.client.collect_block_approval(&approval, ApprovalType::PeerApproval(peer_id)); - NetworkClientResponses::NoResponse + near_network::types::NetworkAdversarialMessage::AdvSetSyncInfo(height) => { + info!(target: "adversary", %height, "AdvSetSyncInfo"); + this.client.adv_sync_height = Some(height); + this.client.send_network_chain_info().expect("adv method should not fail"); + None } - NetworkClientMessages::StateResponse(state_response_info) => { - let shard_id = state_response_info.shard_id(); - let hash = state_response_info.sync_hash(); - let state_response = state_response_info.take_state_response(); - - trace!(target: "sync", "Received state response shard_id: {} sync_hash: {:?} part(id/size): {:?}", - shard_id, - hash, - state_response.part().as_ref().map(|(part_id, data)| (part_id, data.len())) + near_network::types::NetworkAdversarialMessage::AdvGetSavedBlocks => { + info!(target: "adversary", "Requested number of saved blocks"); + let store = this.client.chain.store().store(); + let mut num_blocks = 0; + for _ in store.iter(DBCol::Block) { + num_blocks += 1; + } + Some(num_blocks) + } + near_network::types::NetworkAdversarialMessage::AdvCheckStorageConsistency => { + // timeout is set to 1.5 seconds to give some room as we wait in Nightly for 2 seconds + let timeout = 1500; + info!(target: "adversary", "Check Storage Consistency, timeout set to {:?} milliseconds", timeout); + let mut genesis = near_chain_configs::GenesisConfig::default(); + genesis.genesis_height = this.client.chain.store().get_genesis_height(); + let mut store_validator = near_chain::store_validator::StoreValidator::new( + this.client.validator_signer.as_ref().map(|x| x.validator_id().clone()), + genesis, + this.client.runtime_adapter.clone(), + this.client.chain.store().store().clone(), + this.adv.is_archival(), ); - // Get the download that matches the shard_id and hash - let download = { - let mut download: Option<&mut ShardSyncDownload> = None; - - // ... It could be that the state was requested by the state sync - if let SyncStatus::StateSync(sync_hash, shards_to_download) = - &mut self.client.sync_status - { - if hash == *sync_hash { - if let Some(part_id) = state_response.part_id() { - self.client - .state_sync - .received_requested_part(part_id, shard_id, hash); - } + store_validator.set_timeout(timeout); + store_validator.validate(); + if store_validator.is_failed() { + error!(target: "client", "Storage Validation failed, {:?}", store_validator.errors); + Some(0) + } else { + Some(store_validator.tests_done()) + } + } + }) + } +} + +impl Handler> for ClientActor { + type Result = ProcessTxResponse; + + fn handle( + &mut self, + msg: WithSpanContext, + ctx: &mut Context, + ) -> Self::Result { + self.wrap(msg, ctx, "ProcessTxRequest", |this: &mut Self, msg| { + let ProcessTxRequest { transaction, is_forwarded, check_only } = msg; + this.client.process_tx(transaction, is_forwarded, check_only) + }) + } +} - if let Some(shard_download) = shards_to_download.get_mut(&shard_id) { - assert!( - download.is_none(), - "Internal downloads set has duplicates" - ); - download = Some(shard_download); - } else { - // This may happen because of sending too many StateRequests to different peers. - // For example, we received StateResponse after StateSync completion. +impl Handler> for ClientActor { + type Result = (); + + fn handle(&mut self, msg: WithSpanContext, ctx: &mut Context) { + self.wrap(msg,ctx,"BlockResponse",|this:&mut Self,msg|{ + let BlockResponse{ block, peer_id, was_requested } = msg; + let blocks_at_height = this + .client + .chain + .store() + .get_all_block_hashes_by_height(block.header().height()); + if was_requested || !blocks_at_height.is_ok() { + if let SyncStatus::StateSync(sync_hash, _) = &mut this.client.sync_status { + if let Ok(header) = this.client.chain.get_block_header(sync_hash) { + if block.hash() == header.prev_hash() { + if let Err(e) = this.client.chain.save_block(block.into()) { + error!(target: "client", "Failed to save a block during state sync: {}", e); + } + } else if block.hash() == sync_hash { + // This is the immediate block after a state sync + // We can afford to delay requesting missing chunks for this one block + if let Err(e) = this.client.chain.save_orphan(block.into(), false) { + error!(target: "client", "Received an invalid block during state sync: {}", e); } } + return; } + } + this.client.receive_block( + block, + peer_id.clone(), + was_requested, + this.get_apply_chunks_done_callback(), + ); + } else { + match this + .client + .runtime_adapter + .get_epoch_id_from_prev_block(block.header().prev_hash()) + { + Ok(epoch_id) => { + if let Some(hashes) = blocks_at_height.unwrap().get(&epoch_id) { + if !hashes.contains(block.header().hash()) { + warn!(target: "client", "Rejecting unrequested block {}, height {}", block.header().hash(), block.header().height()); + } + } + } + _ => {} + } + } + }) + } +} + +impl Handler> for ClientActor { + type Result = Result<(), ReasonForBan>; + + fn handle( + &mut self, + msg: WithSpanContext, + ctx: &mut Context, + ) -> Self::Result { + self.wrap(msg, ctx, "BlockHeadersResponse", |this, msg| { + let BlockHeadersResponse(headers, peer_id) = msg; + if this.receive_headers(headers, peer_id) { + Ok(()) + } else { + warn!(target: "client", "Banning node for sending invalid block headers"); + Err(ReasonForBan::BadBlockHeader) + } + }) + } +} - // ... Or one of the catchups - if let Some((_, shards_to_download, _)) = - self.client.catchup_state_syncs.get_mut(&hash) - { +impl Handler> for ClientActor { + type Result = (); + + fn handle(&mut self, msg: WithSpanContext, ctx: &mut Context) { + self.wrap(msg, ctx, "BlockApproval", |this, msg| { + let BlockApproval(approval, peer_id) = msg; + debug!(target: "client", "Receive approval {:?} from peer {:?}", approval, peer_id); + this.client.collect_block_approval(&approval, ApprovalType::PeerApproval(peer_id)); + }) + } +} + +impl Handler> for ClientActor { + type Result = (); + + fn handle(&mut self, msg: WithSpanContext, ctx: &mut Context) { + self.wrap(msg,ctx,"StateResponse",|this,msg| { + let StateResponse(state_response_info) = msg; + let shard_id = state_response_info.shard_id(); + let hash = state_response_info.sync_hash(); + let state_response = state_response_info.take_state_response(); + + trace!(target: "sync", "Received state response shard_id: {} sync_hash: {:?} part(id/size): {:?}", + shard_id, + hash, + state_response.part().as_ref().map(|(part_id, data)| (part_id, data.len())) + ); + // Get the download that matches the shard_id and hash + let download = { + let mut download: Option<&mut ShardSyncDownload> = None; + + // ... It could be that the state was requested by the state sync + if let SyncStatus::StateSync(sync_hash, shards_to_download) = + &mut this.client.sync_status + { + if hash == *sync_hash { if let Some(part_id) = state_response.part_id() { - self.client.state_sync.received_requested_part(part_id, shard_id, hash); + this.client + .state_sync + .received_requested_part(part_id, shard_id, hash); } if let Some(shard_download) = shards_to_download.get_mut(&shard_id) { - assert!(download.is_none(), "Internal downloads set has duplicates"); + assert!( + download.is_none(), + "Internal downloads set has duplicates" + ); download = Some(shard_download); } else { // This may happen because of sending too many StateRequests to different peers. // For example, we received StateResponse after StateSync completion. } } - // We should not be requesting the same state twice. - download - }; - - if let Some(shard_sync_download) = download { - match shard_sync_download.status { - ShardSyncStatus::StateDownloadHeader => { - if let Some(header) = state_response.take_header() { - if !shard_sync_download.downloads[0].done { - match self.client.chain.set_state_header(shard_id, hash, header) - { - Ok(()) => { - shard_sync_download.downloads[0].done = true; - } - Err(err) => { - error!(target: "sync", "State sync set_state_header error, shard = {}, hash = {}: {:?}", shard_id, hash, err); - shard_sync_download.downloads[0].error = true; - } + } + + // ... Or one of the catchups + if let Some((_, shards_to_download, _)) = + this.client.catchup_state_syncs.get_mut(&hash) + { + if let Some(part_id) = state_response.part_id() { + this.client.state_sync.received_requested_part(part_id, shard_id, hash); + } + + if let Some(shard_download) = shards_to_download.get_mut(&shard_id) { + assert!(download.is_none(), "Internal downloads set has duplicates"); + download = Some(shard_download); + } else { + // This may happen because of sending too many StateRequests to different peers. + // For example, we received StateResponse after StateSync completion. + } + } + // We should not be requesting the same state twice. + download + }; + + if let Some(shard_sync_download) = download { + match shard_sync_download.status { + ShardSyncStatus::StateDownloadHeader => { + if let Some(header) = state_response.take_header() { + if !shard_sync_download.downloads[0].done { + match this.client.chain.set_state_header(shard_id, hash, header) + { + Ok(()) => { + shard_sync_download.downloads[0].done = true; + } + Err(err) => { + error!(target: "sync", "State sync set_state_header error, shard = {}, hash = {}: {:?}", shard_id, hash, err); + shard_sync_download.downloads[0].error = true; } } - } else { - // No header found. - // It may happen because requested node couldn't build state response. - if !shard_sync_download.downloads[0].done { - info!(target: "sync", "state_response doesn't have header, should be re-requested, shard = {}, hash = {}", shard_id, hash); - shard_sync_download.downloads[0].error = true; - } + } + } else { + // No header found. + // It may happen because requested node couldn't build state response. + if !shard_sync_download.downloads[0].done { + info!(target: "sync", "state_response doesn't have header, should be re-requested, shard = {}, hash = {}", shard_id, hash); + shard_sync_download.downloads[0].error = true; } } - ShardSyncStatus::StateDownloadParts => { - if let Some(part) = state_response.take_part() { - let num_parts = shard_sync_download.downloads.len() as u64; - let (part_id, data) = part; - if part_id >= num_parts { - error!(target: "sync", "State sync received incorrect part_id # {:?} for hash {:?}, potential malicious peer", part_id, hash); - return NetworkClientResponses::NoResponse; - } - if !shard_sync_download.downloads[part_id as usize].done { - match self.client.chain.set_state_part( - shard_id, - hash, - PartId::new(part_id, num_parts), - &data, - ) { - Ok(()) => { - shard_sync_download.downloads[part_id as usize].done = - true; - } - Err(err) => { - error!(target: "sync", "State sync set_state_part error, shard = {}, part = {}, hash = {}: {:?}", shard_id, part_id, hash, err); - shard_sync_download.downloads[part_id as usize].error = - true; - } + } + ShardSyncStatus::StateDownloadParts => { + if let Some(part) = state_response.take_part() { + let num_parts = shard_sync_download.downloads.len() as u64; + let (part_id, data) = part; + if part_id >= num_parts { + error!(target: "sync", "State sync received incorrect part_id # {:?} for hash {:?}, potential malicious peer", part_id, hash); + return; + } + if !shard_sync_download.downloads[part_id as usize].done { + match this.client.chain.set_state_part( + shard_id, + hash, + PartId::new(part_id, num_parts), + &data, + ) { + Ok(()) => { + shard_sync_download.downloads[part_id as usize].done = + true; + } + Err(err) => { + error!(target: "sync", "State sync set_state_part error, shard = {}, part = {}, hash = {}: {:?}", shard_id, part_id, hash, err); + shard_sync_download.downloads[part_id as usize].error = + true; } } } } - _ => {} } - } else { - error!(target: "sync", "State sync received hash {} that we're not expecting, potential malicious peer", hash); + _ => {} } - - NetworkClientResponses::NoResponse - } - NetworkClientMessages::PartialEncodedChunkRequest(part_request_msg, route_back) => { - let _ = self - .client - .shards_mgr - .process_partial_encoded_chunk_request(part_request_msg, route_back); - NetworkClientResponses::NoResponse - } - NetworkClientMessages::PartialEncodedChunkResponse(response, time) => { - PARTIAL_ENCODED_CHUNK_RESPONSE_DELAY.observe(time.elapsed().as_secs_f64()); - let _ = self.client.shards_mgr.process_partial_encoded_chunk_response(response); - NetworkClientResponses::NoResponse - } - NetworkClientMessages::PartialEncodedChunk(partial_encoded_chunk) => { - self.client.block_production_info.record_chunk_collected( - partial_encoded_chunk.height_created(), - partial_encoded_chunk.shard_id(), - ); - let _ = self - .client - .shards_mgr - .process_partial_encoded_chunk(partial_encoded_chunk.into()); - NetworkClientResponses::NoResponse - } - NetworkClientMessages::PartialEncodedChunkForward(forward) => { - match self.client.shards_mgr.process_partial_encoded_chunk_forward(forward) { - Ok(_) => {} - // Unknown chunk is normal if we get parts before the header - Err(near_chunks::Error::UnknownChunk) => (), - Err(err) => { - error!(target: "client", "Error processing forwarded chunk: {}", err) - } - } - NetworkClientResponses::NoResponse + } else { + error!(target: "sync", "State sync received hash {} that we're not expecting, potential malicious peer", hash); } - NetworkClientMessages::Challenge(challenge) => { - match self.client.process_challenge(challenge) { - Ok(_) => {} - Err(err) => { - error!(target: "client", "Error processing challenge: {}", err); - } - } - NetworkClientResponses::NoResponse + }) + } +} + +impl Handler> for ClientActor { + type Result = (); + + fn handle( + &mut self, + msg: WithSpanContext, + ctx: &mut Context, + ) { + self.wrap(msg, ctx, "RecvPartialEncodedChunkRequest", |this, msg| { + let RecvPartialEncodedChunkRequest(part_request_msg, route_back) = msg; + let _ = this + .client + .shards_mgr + .process_partial_encoded_chunk_request(part_request_msg, route_back); + }) + } +} + +impl Handler> for ClientActor { + type Result = (); + + fn handle( + &mut self, + msg: WithSpanContext, + ctx: &mut Context, + ) { + self.wrap(msg, ctx, "RecvPartialEncodedChunkResponse", |this, msg| { + let RecvPartialEncodedChunkResponse(response, time) = msg; + PARTIAL_ENCODED_CHUNK_RESPONSE_DELAY.observe(time.elapsed().as_secs_f64()); + let _ = this.client.shards_mgr.process_partial_encoded_chunk_response(response); + }); + } +} + +impl Handler> for ClientActor { + type Result = (); + + fn handle(&mut self, msg: WithSpanContext, ctx: &mut Context) { + self.wrap(msg, ctx, "RecvPartialEncodedChunk", |this, msg| { + let RecvPartialEncodedChunk(partial_encoded_chunk) = msg; + this.client.block_production_info.record_chunk_collected( + partial_encoded_chunk.height_created(), + partial_encoded_chunk.shard_id(), + ); + let _ = + this.client.shards_mgr.process_partial_encoded_chunk(partial_encoded_chunk.into()); + }) + } +} + +impl Handler> for ClientActor { + type Result = (); + + fn handle( + &mut self, + msg: WithSpanContext, + ctx: &mut Context, + ) { + self.wrap(msg, ctx, "RectPartialEncodedChunkForward", |this, msg| { + let RecvPartialEncodedChunkForward(forward) = msg; + match this.client.shards_mgr.process_partial_encoded_chunk_forward(forward) { + Ok(_) => {} + // Unknown chunk is normal if we get parts before the header + Err(near_chunks::Error::UnknownChunk) => (), + Err(err) => error!(target: "client", "Error processing forwarded chunk: {}", err), } - NetworkClientMessages::NetworkInfo(network_info) => { - self.network_info = network_info; - NetworkClientResponses::NoResponse + }) + } +} + +impl Handler> for ClientActor { + type Result = (); + + fn handle(&mut self, msg: WithSpanContext, ctx: &mut Context) { + self.wrap(msg, ctx, "RecvChallenge", |this, msg| { + let RecvChallenge(challenge) = msg; + match this.client.process_challenge(challenge) { + Ok(_) => {} + Err(err) => error!(target: "client", "Error processing challenge: {}", err), } - } + }); + } +} + +impl Handler> for ClientActor { + type Result = (); + + fn handle(&mut self, msg: WithSpanContext, ctx: &mut Context) { + self.wrap(msg, ctx, "SetNetworkInfo", |this, msg| { + let SetNetworkInfo(network_info) = msg; + this.network_info = network_info; + }) } } diff --git a/chain/client/src/lib.rs b/chain/client/src/lib.rs index 02bff5d5fa6..fb58149a21d 100644 --- a/chain/client/src/lib.rs +++ b/chain/client/src/lib.rs @@ -9,6 +9,9 @@ pub use near_client_primitives::types::{ pub use near_client_primitives::debug::DebugStatus; +pub use crate::adapter::{ + BlockApproval, BlockResponse, ProcessTxRequest, ProcessTxResponse, SetNetworkInfo, +}; pub use crate::client::Client; pub use crate::client_actor::{start_client, ClientActor}; pub use crate::view_client::{start_view_client, ViewClientActor}; diff --git a/chain/client/src/test_utils.rs b/chain/client/src/test_utils.rs index ee176ef878d..3a3f4479b8e 100644 --- a/chain/client/src/test_utils.rs +++ b/chain/client/src/test_utils.rs @@ -13,10 +13,6 @@ use once_cell::sync::OnceCell; use rand::{thread_rng, Rng}; use tracing::info; -use crate::adapter::{ - NetworkClientMessages, NetworkClientResponses, NetworkViewClientMessages, - NetworkViewClientResponses, -}; use crate::{start_view_client, Client, ClientActor, SyncStatus, ViewClientActor}; use near_chain::chain::{do_apply_chunks, BlockCatchUpRequest, StateSplitRequest}; use near_chain::test_utils::{ @@ -71,6 +67,13 @@ use near_store::test_utils::create_test_store; use near_store::Store; use near_telemetry::TelemetryActor; +use crate::adapter::{ + AnnounceAccountRequest, BlockApproval, BlockHeadersRequest, BlockHeadersResponse, BlockRequest, + BlockResponse, ProcessTxResponse, RecvPartialEncodedChunk, RecvPartialEncodedChunkForward, + RecvPartialEncodedChunkRequest, RecvPartialEncodedChunkResponse, SetNetworkInfo, + StateRequestHeader, StateRequestPart, StateResponse, +}; + pub struct PeerManagerMock { handle: Box< dyn FnMut( @@ -504,16 +507,16 @@ fn send_chunks( recipients: I, target: T, drop_chunks: bool, - create_msg: F, + send_to: F, ) where T: Eq, I: Iterator, - F: Fn() -> WithSpanContext, + F: Fn(&Addr), { for (i, name) in recipients { if name == target { if !drop_chunks || !thread_rng().gen_ratio(1, 5) { - connectors[i].0.do_send(create_msg()); + send_to(&connectors[i].0); } } } @@ -677,8 +680,7 @@ pub fn setup_mock_all_validators( known_producers: vec![], tier1_accounts: vec![], }; - client_addr - .do_send(NetworkClientMessages::NetworkInfo(info).with_span_context()); + client_addr.do_send(SetNetworkInfo(info).with_span_context()); } match msg.as_network_requests_ref() { @@ -691,11 +693,11 @@ pub fn setup_mock_all_validators( for (client, _) in connectors1 { client.do_send( - NetworkClientMessages::Block( - block.clone(), - PeerInfo::random().id, - false, - ) + BlockResponse { + block: block.clone(), + peer_id: PeerInfo::random().id, + was_requested: false, + } .with_span_context(), ); } @@ -713,34 +715,28 @@ pub fn setup_mock_all_validators( } NetworkRequests::PartialEncodedChunkRequest { target, request, .. } => { let create_msg = || { - NetworkClientMessages::PartialEncodedChunkRequest( - request.clone(), - my_address, - ) - .with_span_context() + RecvPartialEncodedChunkRequest(request.clone(), my_address) + .with_span_context() }; send_chunks( connectors1, validators_clone2.iter().map(|s| Some(s.clone())).enumerate(), target.account_id.as_ref().map(|s| s.clone()), drop_chunks, - create_msg, + |c| c.do_send(create_msg()), ); } NetworkRequests::PartialEncodedChunkResponse { route_back, response } => { let create_msg = || { - NetworkClientMessages::PartialEncodedChunkResponse( - response.clone(), - Clock::instant(), - ) - .with_span_context() + RecvPartialEncodedChunkResponse(response.clone(), Clock::instant()) + .with_span_context() }; send_chunks( connectors1, addresses.iter().enumerate(), route_back, drop_chunks, - create_msg, + |c| c.do_send(create_msg()), ); } NetworkRequests::PartialEncodedChunkMessage { @@ -748,30 +744,27 @@ pub fn setup_mock_all_validators( partial_encoded_chunk, } => { let create_msg = || { - NetworkClientMessages::PartialEncodedChunk( - partial_encoded_chunk.clone().into(), - ) - .with_span_context() + RecvPartialEncodedChunk(partial_encoded_chunk.clone().into()) + .with_span_context() }; send_chunks( connectors1, validators_clone2.iter().cloned().enumerate(), account_id.clone(), drop_chunks, - create_msg, + |c| c.do_send(create_msg()), ); } NetworkRequests::PartialEncodedChunkForward { account_id, forward } => { let create_msg = || { - NetworkClientMessages::PartialEncodedChunkForward(forward.clone()) - .with_span_context() + RecvPartialEncodedChunkForward(forward.clone()).with_span_context() }; send_chunks( connectors1, validators_clone2.iter().cloned().enumerate(), account_id.clone(), drop_chunks, - create_msg, + |c| c.do_send(create_msg()), ); } NetworkRequests::BlockRequest { hash, peer_id } => { @@ -782,23 +775,21 @@ pub fn setup_mock_all_validators( actix::spawn( connectors1[i] .1 - .send( - NetworkViewClientMessages::BlockRequest(*hash) - .with_span_context(), - ) + .send(BlockRequest(*hash).with_span_context()) .then(move |response| { let response = response.unwrap(); match response { - NetworkViewClientResponses::Block(block) => { + Some(block) => { me.do_send( - NetworkClientMessages::Block( - *block, peer_id, true, - ) + BlockResponse { + block: *block, + peer_id, + was_requested: true, + } .with_span_context(), ); } - NetworkViewClientResponses::NoResponse => {} - _ => assert!(false), + None => {} } future::ready(()) }), @@ -815,26 +806,19 @@ pub fn setup_mock_all_validators( connectors1[i] .1 .send( - NetworkViewClientMessages::BlockHeadersRequest( - hashes.clone(), - ) - .with_span_context(), + BlockHeadersRequest(hashes.clone()) + .with_span_context(), ) .then(move |response| { let response = response.unwrap(); match response { - NetworkViewClientResponses::BlockHeaders( - headers, - ) => { + Some(headers) => { me.do_send( - NetworkClientMessages::BlockHeaders( - headers, peer_id, - ) - .with_span_context(), + BlockHeadersResponse(headers, peer_id) + .with_span_context(), ); } - NetworkViewClientResponses::NoResponse => {} - _ => assert!(false), + None => {} } future::ready(()) }), @@ -858,7 +842,7 @@ pub fn setup_mock_all_validators( connectors1[i] .1 .send( - NetworkViewClientMessages::StateRequestHeader { + StateRequestHeader { shard_id: *shard_id, sync_hash: *sync_hash, } @@ -867,18 +851,10 @@ pub fn setup_mock_all_validators( .then(move |response| { let response = response.unwrap(); match response { - NetworkViewClientResponses::StateResponse( - response, - ) => { - me.do_send( - NetworkClientMessages::StateResponse( - *response, - ) - .with_span_context(), - ); + Some(response) => { + me.do_send(response.with_span_context()); } - NetworkViewClientResponses::NoResponse => {} - _ => assert!(false), + None => {} } future::ready(()) }), @@ -903,7 +879,7 @@ pub fn setup_mock_all_validators( connectors1[i] .1 .send( - NetworkViewClientMessages::StateRequestPart { + StateRequestPart { shard_id: *shard_id, sync_hash: *sync_hash, part_id: *part_id, @@ -913,18 +889,10 @@ pub fn setup_mock_all_validators( .then(move |response| { let response = response.unwrap(); match response { - NetworkViewClientResponses::StateResponse( - response, - ) => { - me.do_send( - NetworkClientMessages::StateResponse( - *response, - ) - .with_span_context(), - ); + Some(response) => { + me.do_send(response.with_span_context()); } - NetworkViewClientResponses::NoResponse => {} - _ => assert!(false), + None => {} } future::ready(()) }), @@ -936,7 +904,7 @@ pub fn setup_mock_all_validators( for (i, address) in addresses.iter().enumerate() { if route_back == address { connectors1[i].0.do_send( - NetworkClientMessages::StateResponse(response.clone()) + StateResponse(Box::new(response.clone())) .with_span_context(), ); } @@ -952,7 +920,7 @@ pub fn setup_mock_all_validators( aa.insert(key); for (_, view_client) in connectors1 { view_client.do_send( - NetworkViewClientMessages::AnnounceAccount(vec![( + AnnounceAccountRequest(vec![( announce_account.clone(), None, )]) @@ -983,11 +951,8 @@ pub fn setup_mock_all_validators( for (i, name) in validators_clone2.iter().enumerate() { if name == &approval_message.target { connectors1[i].0.do_send( - NetworkClientMessages::BlockApproval( - approval.clone(), - my_key_pair.id.clone(), - ) - .with_span_context(), + BlockApproval(approval.clone(), my_key_pair.id.clone()) + .with_span_context(), ); } } @@ -1511,7 +1476,7 @@ impl TestEnv { } } - pub fn send_money(&mut self, id: usize) -> NetworkClientResponses { + pub fn send_money(&mut self, id: usize) -> ProcessTxResponse { let account_id = self.get_client_id(0); let signer = InMemorySigner::from_seed(account_id.clone(), KeyType::ED25519, account_id.as_ref()); diff --git a/chain/client/src/tests/bug_repros.rs b/chain/client/src/tests/bug_repros.rs index adde73db03a..4d12fca60f1 100644 --- a/chain/client/src/tests/bug_repros.rs +++ b/chain/client/src/tests/bug_repros.rs @@ -9,7 +9,7 @@ use actix::{Addr, System}; use futures::FutureExt; use rand::{thread_rng, Rng}; -use crate::adapter::NetworkClientMessages; +use crate::adapter::{BlockApproval, BlockResponse, ProcessTxRequest, RecvPartialEncodedChunk}; use crate::test_utils::setup_mock_all_validators; use crate::{ClientActor, GetBlock, ViewClientActor}; use near_actix_test_utils::run_actix; @@ -72,11 +72,11 @@ fn repro_1183() { if let Some(last_block) = last_block.clone() { for (client, _) in connectors1.write().unwrap().iter() { client.do_send( - NetworkClientMessages::Block( - last_block.clone(), - PeerInfo::random().id, - false, - ) + BlockResponse { + block: last_block.clone(), + peer_id: PeerInfo::random().id, + was_requested: false, + } .with_span_context(), ) } @@ -91,7 +91,7 @@ fn repro_1183() { for (i, name) in validators.iter().enumerate() { if name == account_id { connectors1.write().unwrap()[i].0.do_send( - NetworkClientMessages::PartialEncodedChunk( + RecvPartialEncodedChunk( partial_encoded_chunk.clone().into(), ) .with_span_context(), @@ -110,7 +110,7 @@ fn repro_1183() { connectors1.write().unwrap()[account_id_to_shard_id(&from, 4) as usize] .0 .do_send( - NetworkClientMessages::Transaction { + ProcessTxRequest { transaction: SignedTransaction::send_money( block.header().height() * 16 + nonce_delta, from.clone(), @@ -211,11 +211,11 @@ fn test_sync_from_archival_node() { for (i, (client, _)) in conns.iter().enumerate() { if i != 3 { client.do_send( - NetworkClientMessages::Block( - block.clone(), - PeerInfo::random().id, - false, - ) + BlockResponse { + block: block.clone(), + peer_id: PeerInfo::random().id, + was_requested: false, + } .with_span_context(), ) } @@ -229,7 +229,7 @@ fn test_sync_from_archival_node() { for (i, (client, _)) in conns.clone().into_iter().enumerate() { if i != 3 { client.do_send( - NetworkClientMessages::BlockApproval( + BlockApproval( approval_message.approval.clone(), PeerInfo::random().id, ) @@ -247,8 +247,12 @@ fn test_sync_from_archival_node() { } for (_, block) in blocks.write().unwrap().drain() { conns[3].0.do_send( - NetworkClientMessages::Block(block, PeerInfo::random().id, false) - .with_span_context(), + BlockResponse { + block, + peer_id: PeerInfo::random().id, + was_requested: false, + } + .with_span_context(), ); } match msg { diff --git a/chain/client/src/tests/catching_up.rs b/chain/client/src/tests/catching_up.rs index 300c08bb207..0b5dd56720b 100644 --- a/chain/client/src/tests/catching_up.rs +++ b/chain/client/src/tests/catching_up.rs @@ -6,7 +6,7 @@ use actix::{Addr, System}; use borsh::{BorshDeserialize, BorshSerialize}; use futures::{future, FutureExt}; -use crate::adapter::NetworkClientMessages; +use crate::adapter::ProcessTxRequest; use crate::test_utils::setup_mock_all_validators; use crate::{ClientActor, Query, ViewClientActor}; use near_actix_test_utils::run_actix; @@ -71,7 +71,7 @@ fn send_tx( ) { let signer = InMemorySigner::from_seed("test1".parse().unwrap(), KeyType::ED25519, "test1"); connector.do_send( - NetworkClientMessages::Transaction { + ProcessTxRequest { transaction: SignedTransaction::send_money( nonce, from, to, &signer, amount, block_hash, ), diff --git a/chain/client/src/tests/consensus.rs b/chain/client/src/tests/consensus.rs index 80f0792a435..c45c8ce341c 100644 --- a/chain/client/src/tests/consensus.rs +++ b/chain/client/src/tests/consensus.rs @@ -5,7 +5,7 @@ use actix::{Addr, System}; use near_chain::test_utils::ValidatorSchedule; use rand::{thread_rng, Rng}; -use crate::adapter::NetworkClientMessages; +use crate::adapter::{BlockApproval, BlockResponse}; use crate::test_utils::setup_mock_all_validators; use crate::{ClientActor, ViewClientActor}; use near_actix_test_utils::run_actix; @@ -155,11 +155,11 @@ fn test_consensus_with_epoch_switches() { if delayed_block.header().height() <= block.header().height() + 2 { for target_ord in 0..24 { connectors1.write().unwrap()[target_ord].0.do_send( - NetworkClientMessages::Block( - delayed_block.clone(), - key_pairs[0].clone().id, - true, - ) + BlockResponse { + block: delayed_block.clone(), + peer_id: key_pairs[0].clone().id, + was_requested: true, + } .with_span_context(), ); } @@ -257,11 +257,8 @@ fn test_consensus_with_epoch_switches() { [epoch_id * 8 + (destination_ord + delta) % 8] .0 .do_send( - NetworkClientMessages::BlockApproval( - approval, - key_pairs[my_ord].id.clone(), - ) - .with_span_context(), + BlockApproval(approval, key_pairs[my_ord].id.clone()) + .with_span_context(), ); // Do not send the endorsement for couple block producers in each epoch // This is needed because otherwise the block with enough endorsements diff --git a/chain/client/src/tests/cross_shard_tx.rs b/chain/client/src/tests/cross_shard_tx.rs index 16611caac23..6584b199f8b 100644 --- a/chain/client/src/tests/cross_shard_tx.rs +++ b/chain/client/src/tests/cross_shard_tx.rs @@ -7,7 +7,6 @@ use std::sync::{Arc, RwLock}; use actix::{Addr, MailboxError, System}; use futures::{future, FutureExt}; -use crate::adapter::{NetworkClientMessages, NetworkClientResponses}; use near_actix_test_utils::run_actix; use near_chain::test_utils::{account_id_to_shard_id, ValidatorSchedule}; use near_crypto::{InMemorySigner, KeyType}; @@ -23,6 +22,7 @@ use near_primitives::types::{AccountId, BlockReference}; use near_primitives::views::QueryResponseKind::ViewAccount; use near_primitives::views::{QueryRequest, QueryResponse}; +use crate::adapter::{ProcessTxRequest, ProcessTxResponse}; use crate::test_utils::{setup_mock_all_validators, BlockStats}; use crate::{ClientActor, Query, ViewClientActor}; @@ -111,7 +111,7 @@ fn send_tx( connectors.write().unwrap()[connector_ordinal] .0 .send( - NetworkClientMessages::Transaction { + ProcessTxRequest { transaction: SignedTransaction::send_money( nonce, from.clone(), @@ -127,7 +127,7 @@ fn send_tx( ) .then(move |x| { match x.unwrap() { - NetworkClientResponses::NoResponse | NetworkClientResponses::RequestRouted => { + ProcessTxResponse::NoResponse | ProcessTxResponse::RequestRouted => { assert_eq!(num_validators, 24); send_tx( num_validators, @@ -140,7 +140,7 @@ fn send_tx( block_hash, ); } - NetworkClientResponses::ValidTx => { + ProcessTxResponse::ValidTx => { println!("Transaction was received by validator {:?}", connector_ordinal); } other @ _ => { diff --git a/chain/client/src/tests/query_client.rs b/chain/client/src/tests/query_client.rs index bde0c3444e5..5ff0e4ff0dc 100644 --- a/chain/client/src/tests/query_client.rs +++ b/chain/client/src/tests/query_client.rs @@ -5,10 +5,7 @@ use near_primitives::merkle::PartialMerkleTree; use std::sync::Arc; use std::time::Duration; -use crate::adapter::{ - NetworkClientMessages, NetworkClientResponses, NetworkViewClientMessages, - NetworkViewClientResponses, -}; +use crate::adapter::{BlockResponse, ProcessTxRequest, ProcessTxResponse, StateRequestHeader}; use crate::test_utils::{setup_mock_all_validators, setup_no_network, setup_only_view}; use crate::{ GetBlock, GetBlockWithMerkleTree, GetExecutionOutcomesForBlock, Query, QueryError, Status, @@ -106,8 +103,12 @@ fn query_status_not_crash() { actix::spawn( client .send( - NetworkClientMessages::Block(next_block, PeerInfo::random().id, false) - .with_span_context(), + BlockResponse { + block: next_block, + peer_id: PeerInfo::random().id, + was_requested: false, + } + .with_span_context(), ) .then(move |_| { actix::spawn( @@ -159,16 +160,12 @@ fn test_execution_outcome_for_chunk() { let tx_hash = transaction.get_hash(); let res = client .send( - NetworkClientMessages::Transaction { - transaction, - is_forwarded: false, - check_only: false, - } - .with_span_context(), + ProcessTxRequest { transaction, is_forwarded: false, check_only: false } + .with_span_context(), ) .await .unwrap(); - assert!(matches!(res, NetworkClientResponses::ValidTx)); + assert!(matches!(res, ProcessTxResponse::ValidTx)); actix::clock::sleep(Duration::from_millis(500)).await; let block_hash = view_client @@ -233,41 +230,26 @@ fn test_state_request() { for _ in 0..30 { let res = view_client .send( - NetworkViewClientMessages::StateRequestHeader { - shard_id: 0, - sync_hash: block_hash, - } - .with_span_context(), + StateRequestHeader { shard_id: 0, sync_hash: block_hash } + .with_span_context(), ) .await .unwrap(); - assert!(matches!(res, NetworkViewClientResponses::StateResponse(_))); + assert!(res.is_some()); } // immediately query again, should be rejected let res = view_client - .send( - NetworkViewClientMessages::StateRequestHeader { - shard_id: 0, - sync_hash: block_hash, - } - .with_span_context(), - ) + .send(StateRequestHeader { shard_id: 0, sync_hash: block_hash }.with_span_context()) .await .unwrap(); - assert!(matches!(res, NetworkViewClientResponses::NoResponse)); + assert!(res.is_none()); actix::clock::sleep(Duration::from_secs(40)).await; let res = view_client - .send( - NetworkViewClientMessages::StateRequestHeader { - shard_id: 0, - sync_hash: block_hash, - } - .with_span_context(), - ) + .send(StateRequestHeader { shard_id: 0, sync_hash: block_hash }.with_span_context()) .await .unwrap(); - assert!(matches!(res, NetworkViewClientResponses::StateResponse(_))); + assert!(res.is_some()); System::current().stop(); }); near_network::test_utils::wait_or_panic(50000); diff --git a/chain/client/src/view_client.rs b/chain/client/src/view_client.rs index dde48aedd87..a2a595fbe2b 100644 --- a/chain/client/src/view_client.rs +++ b/chain/client/src/view_client.rs @@ -12,7 +12,6 @@ use std::time::{Duration, Instant}; use tracing::{debug, error, info, trace, warn}; -use crate::adapter::{NetworkViewClientMessages, NetworkViewClientResponses}; use near_chain::{ get_epoch_block_producers_view, Chain, ChainGenesis, ChainStoreAccess, DoomslugThresholdMode, RuntimeAdapter, @@ -54,6 +53,10 @@ use near_primitives::views::{ QueryRequest, QueryResponse, ReceiptView, StateChangesKindsView, StateChangesView, }; +use crate::adapter::{ + AnnounceAccountRequest, BlockHeadersRequest, BlockRequest, StateRequestHeader, + StateRequestPart, StateResponse, TxStatusRequest, TxStatusResponse, +}; use crate::{ metrics, sync, GetChunk, GetExecutionOutcomeResponse, GetNextLightClientBlock, GetStateChanges, GetStateChangesInBlock, GetValidatorInfo, GetValidatorOrdered, @@ -1013,247 +1016,332 @@ impl Handler> for ViewClientActor { } } -impl Handler> for ViewClientActor { - type Result = NetworkViewClientResponses; +#[cfg(feature = "test_features")] +impl Handler> for ViewClientActor { + type Result = Option; #[perf] fn handle( &mut self, - msg: WithSpanContext, + msg: WithSpanContext, _ctx: &mut Self::Context, ) -> Self::Result { let (_span, msg) = handler_debug_span!(target: "client", msg); - let _timer = - metrics::VIEW_CLIENT_MESSAGE_TIME.with_label_values(&[(&msg).into()]).start_timer(); + let _timer = metrics::VIEW_CLIENT_MESSAGE_TIME + .with_label_values(&["NetworkAdversarialMessage"]) + .start_timer(); match msg { - #[cfg(feature = "test_features")] - NetworkViewClientMessages::Adversarial(adversarial_msg) => { - return match adversarial_msg { - NetworkAdversarialMessage::AdvDisableDoomslug => { - info!(target: "adversary", "Turning Doomslug off"); - self.adv.set_disable_doomslug(true); - self.chain.adv_disable_doomslug(); - NetworkViewClientResponses::NoResponse - } - NetworkAdversarialMessage::AdvDisableHeaderSync => { - info!(target: "adversary", "Blocking header sync"); - self.adv.set_disable_header_sync(true); - NetworkViewClientResponses::NoResponse - } - NetworkAdversarialMessage::AdvSwitchToHeight(height) => { - info!(target: "adversary", "Switching to height"); - let mut chain_store_update = self.chain.mut_store().store_update(); - chain_store_update.save_largest_target_height(height); - chain_store_update - .adv_save_latest_known(height) - .expect("adv method should not fail"); - chain_store_update.commit().expect("adv method should not fail"); - NetworkViewClientResponses::NoResponse - } - _ => panic!("invalid adversary message"), - } + NetworkAdversarialMessage::AdvDisableDoomslug => { + info!(target: "adversary", "Turning Doomslug off"); + self.adv.set_disable_doomslug(true); } - NetworkViewClientMessages::TxStatus { tx_hash, signer_account_id } => { - if let Ok(Some(result)) = self.get_tx_status(tx_hash, signer_account_id, false) { - NetworkViewClientResponses::TxStatus(Box::new(result.into_outcome())) - } else { - NetworkViewClientResponses::NoResponse - } + NetworkAdversarialMessage::AdvDisableHeaderSync => { + info!(target: "adversary", "Blocking header sync"); + self.adv.set_disable_header_sync(true); } - NetworkViewClientMessages::TxStatusResponse(tx_result) => { - let tx_hash = tx_result.transaction_outcome.id; - let mut request_manager = self.request_manager.write().expect(POISONED_LOCK_ERR); - if request_manager.tx_status_requests.pop(&tx_hash).is_some() { - request_manager.tx_status_response.put(tx_hash, *tx_result); - } - NetworkViewClientResponses::NoResponse - } - NetworkViewClientMessages::BlockRequest(hash) => { - if let Ok(block) = self.chain.get_block(&hash) { - NetworkViewClientResponses::Block(Box::new(block)) - } else { - NetworkViewClientResponses::NoResponse - } - } - NetworkViewClientMessages::BlockHeadersRequest(hashes) => { - if self.adv.disable_header_sync() { - NetworkViewClientResponses::NoResponse - } else if let Ok(headers) = self.retrieve_headers(hashes) { - NetworkViewClientResponses::BlockHeaders(headers) - } else { - NetworkViewClientResponses::NoResponse - } + NetworkAdversarialMessage::AdvSwitchToHeight(height) => { + info!(target: "adversary", "Switching to height"); + let mut chain_store_update = self.chain.mut_store().store_update(); + chain_store_update.save_largest_target_height(height); + chain_store_update + .adv_save_latest_known(height) + .expect("adv method should not fail"); + chain_store_update.commit().expect("adv method should not fail"); } - NetworkViewClientMessages::StateRequestHeader { shard_id, sync_hash } => { - if !self.check_state_sync_request() { - return NetworkViewClientResponses::NoResponse; - } + _ => panic!("invalid adversary message"), + } + None + } +} - let state_response = match self.chain.check_sync_hash_validity(&sync_hash) { - Ok(true) => { - let header = match self.chain.get_state_response_header(shard_id, sync_hash) - { - Ok(header) => Some(header), - Err(e) => { - error!(target: "sync", "Cannot build sync header (get_state_response_header): {}", e); - None - } - }; - match header { - None => ShardStateSyncResponse::V1(ShardStateSyncResponseV1 { - header: None, - part: None, - }), - Some(ShardStateSyncResponseHeader::V1(header)) => { - ShardStateSyncResponse::V1(ShardStateSyncResponseV1 { - header: Some(header), - part: None, - }) - } - Some(ShardStateSyncResponseHeader::V2(header)) => { - ShardStateSyncResponse::V2(ShardStateSyncResponseV2 { - header: Some(header), - part: None, - }) - } - } - } - Ok(false) => { - warn!(target: "sync", "sync_hash {:?} didn't pass validation, possible malicious behavior", sync_hash); - return NetworkViewClientResponses::NoResponse; +impl Handler> for ViewClientActor { + type Result = Option>; + + #[perf] + fn handle( + &mut self, + msg: WithSpanContext, + _ctx: &mut Self::Context, + ) -> Self::Result { + let (_span, msg) = handler_debug_span!(target: "client", msg); + let _timer = + metrics::VIEW_CLIENT_MESSAGE_TIME.with_label_values(&["TxStatusRequest"]).start_timer(); + let TxStatusRequest { tx_hash, signer_account_id } = msg; + if let Ok(Some(result)) = self.get_tx_status(tx_hash, signer_account_id, false) { + Some(Box::new(result.into_outcome())) + } else { + None + } + } +} + +impl Handler> for ViewClientActor { + type Result = (); + + #[perf] + fn handle( + &mut self, + msg: WithSpanContext, + _ctx: &mut Self::Context, + ) -> Self::Result { + let (_span, msg) = handler_debug_span!(target: "client", msg); + let _timer = metrics::VIEW_CLIENT_MESSAGE_TIME + .with_label_values(&["TxStatusResponse"]) + .start_timer(); + let TxStatusResponse(tx_result) = msg; + let tx_hash = tx_result.transaction_outcome.id; + let mut request_manager = self.request_manager.write().expect(POISONED_LOCK_ERR); + if request_manager.tx_status_requests.pop(&tx_hash).is_some() { + request_manager.tx_status_response.put(tx_hash, *tx_result); + } + } +} + +impl Handler> for ViewClientActor { + type Result = Option>; + + #[perf] + fn handle( + &mut self, + msg: WithSpanContext, + _ctx: &mut Self::Context, + ) -> Self::Result { + let (_span, msg) = handler_debug_span!(target: "client", msg); + let _timer = + metrics::VIEW_CLIENT_MESSAGE_TIME.with_label_values(&["BlockRequest"]).start_timer(); + let BlockRequest(hash) = msg; + if let Ok(block) = self.chain.get_block(&hash) { + Some(Box::new(block)) + } else { + None + } + } +} + +impl Handler> for ViewClientActor { + type Result = Option>; + + #[perf] + fn handle( + &mut self, + msg: WithSpanContext, + _ctx: &mut Self::Context, + ) -> Self::Result { + let (_span, msg) = handler_debug_span!(target: "client", msg); + let _timer = metrics::VIEW_CLIENT_MESSAGE_TIME + .with_label_values(&["BlockHeadersRequest"]) + .start_timer(); + let BlockHeadersRequest(hashes) = msg; + + if self.adv.disable_header_sync() { + None + } else if let Ok(headers) = self.retrieve_headers(hashes) { + Some(headers) + } else { + None + } + } +} + +impl Handler> for ViewClientActor { + type Result = Option; + + #[perf] + fn handle( + &mut self, + msg: WithSpanContext, + _ctx: &mut Self::Context, + ) -> Self::Result { + let (_span, msg) = handler_debug_span!(target: "client", msg); + let _timer = metrics::VIEW_CLIENT_MESSAGE_TIME + .with_label_values(&["StateRequestHeader"]) + .start_timer(); + let StateRequestHeader { shard_id, sync_hash } = msg; + if !self.check_state_sync_request() { + return None; + } + let state_response = match self.chain.check_sync_hash_validity(&sync_hash) { + Ok(true) => { + let header = match self.chain.get_state_response_header(shard_id, sync_hash) { + Ok(header) => Some(header), + Err(e) => { + error!(target: "sync", "Cannot build sync header (get_state_response_header): {}", e); + None } - Err(e) => match e { - near_chain::Error::DBNotFoundErr(_) => { - // This case may appear in case of latency in epoch switching. - // Request sender is ready to sync but we still didn't get the block. - info!(target: "sync", "Can't get sync_hash block {:?} for state request header", sync_hash); - ShardStateSyncResponse::V1(ShardStateSyncResponseV1 { - header: None, - part: None, - }) - } - _ => { - error!(target: "sync", "Failed to verify sync_hash {:?} validity, {:?}", sync_hash, e); - ShardStateSyncResponse::V1(ShardStateSyncResponseV1 { - header: None, - part: None, - }) - } - }, }; - match state_response { - ShardStateSyncResponse::V1(state_response) => { - let info = StateResponseInfo::V1(StateResponseInfoV1 { - shard_id, - sync_hash, - state_response, - }); - NetworkViewClientResponses::StateResponse(Box::new(info)) + match header { + None => ShardStateSyncResponse::V1(ShardStateSyncResponseV1 { + header: None, + part: None, + }), + Some(ShardStateSyncResponseHeader::V1(header)) => { + ShardStateSyncResponse::V1(ShardStateSyncResponseV1 { + header: Some(header), + part: None, + }) } - state_response @ ShardStateSyncResponse::V2(_) => { - let info = StateResponseInfo::V2(StateResponseInfoV2 { - shard_id, - sync_hash, - state_response, - }); - NetworkViewClientResponses::StateResponse(Box::new(info)) + Some(ShardStateSyncResponseHeader::V2(header)) => { + ShardStateSyncResponse::V2(ShardStateSyncResponseV2 { + header: Some(header), + part: None, + }) } } } - NetworkViewClientMessages::StateRequestPart { shard_id, sync_hash, part_id } => { - if !self.check_state_sync_request() { - return NetworkViewClientResponses::NoResponse; + Ok(false) => { + warn!(target: "sync", "sync_hash {:?} didn't pass validation, possible malicious behavior", sync_hash); + return None; + } + Err(e) => match e { + near_chain::Error::DBNotFoundErr(_) => { + // This case may appear in case of latency in epoch switching. + // Request sender is ready to sync but we still didn't get the block. + info!(target: "sync", "Can't get sync_hash block {:?} for state request header", sync_hash); + ShardStateSyncResponse::V1(ShardStateSyncResponseV1 { + header: None, + part: None, + }) } - trace!(target: "sync", "Computing state request part {} {} {}", shard_id, sync_hash, part_id); - let state_response = match self.chain.check_sync_hash_validity(&sync_hash) { - Ok(true) => { - let part = match self - .chain - .get_state_response_part(shard_id, part_id, sync_hash) - { - Ok(part) => Some((part_id, part)), - Err(e) => { - error!(target: "sync", "Cannot build sync part #{:?} (get_state_response_part): {}", part_id, e); - None - } - }; - - trace!(target: "sync", "Finish computation for state request part {} {} {}", shard_id, sync_hash, part_id); - ShardStateSyncResponseV1 { header: None, part } - } - Ok(false) => { - warn!(target: "sync", "sync_hash {:?} didn't pass validation, possible malicious behavior", sync_hash); - return NetworkViewClientResponses::NoResponse; - } - Err(e) => match e { - near_chain::Error::DBNotFoundErr(_) => { - // This case may appear in case of latency in epoch switching. - // Request sender is ready to sync but we still didn't get the block. - info!(target: "sync", "Can't get sync_hash block {:?} for state request part", sync_hash); - ShardStateSyncResponseV1 { header: None, part: None } - } - _ => { - error!(target: "sync", "Failed to verify sync_hash {:?} validity, {:?}", sync_hash, e); - ShardStateSyncResponseV1 { header: None, part: None } - } - }, - }; + _ => { + error!(target: "sync", "Failed to verify sync_hash {:?} validity, {:?}", sync_hash, e); + ShardStateSyncResponse::V1(ShardStateSyncResponseV1 { + header: None, + part: None, + }) + } + }, + }; + match state_response { + ShardStateSyncResponse::V1(state_response) => { let info = StateResponseInfo::V1(StateResponseInfoV1 { shard_id, sync_hash, state_response, }); - NetworkViewClientResponses::StateResponse(Box::new(info)) + Some(StateResponse(Box::new(info))) } - NetworkViewClientMessages::AnnounceAccount(announce_accounts) => { - let mut filtered_announce_accounts = Vec::new(); - - for (announce_account, last_epoch) in announce_accounts { - // Keep the announcement if it is newer than the last announcement from - // the same account. - if let Some(last_epoch) = last_epoch { - match self - .runtime_adapter - .compare_epoch_id(&announce_account.epoch_id, &last_epoch) - { - Ok(Ordering::Greater) => {} - _ => continue, - } - } + state_response @ ShardStateSyncResponse::V2(_) => { + let info = StateResponseInfo::V2(StateResponseInfoV2 { + shard_id, + sync_hash, + state_response, + }); + Some(StateResponse(Box::new(info))) + } + } + } +} - match self.check_signature_account_announce(&announce_account) { - Ok(true) => { - filtered_announce_accounts.push(announce_account); - } - // TODO(gprusak): Here we ban for broadcasting accounts which have been slashed - // according to BlockInfo for the current chain tip. It is unfair, - // given that peers do not have perfectly synchronized heads: - // - AFAIU each block can introduce a slashed account, so the announcement - // could be OK at the moment that peer has sent it out. - // - the current epoch_id is not related to announce_account.epoch_id, - // so it carry a perfectly valid (outdated) information. - Ok(false) => { - return NetworkViewClientResponses::Ban { - ban_reason: ReasonForBan::InvalidSignature, - }; - } - // Filter out this account. This covers both good reasons to ban the peer: - // - signature didn't match the data and public_key. - // - account is not a validator for the given epoch - // and cases when we were just unable to validate the data (so we shouldn't - // ban), for example when the node is not aware of the public key for the given - // (account_id,epoch_id) pair. - // We currently do NOT ban the peer for either. - // TODO(gprusak): consider whether we should change that. - Err(e) => { - debug!(target: "view_client", "Failed to validate account announce signature: {}", e); - } +impl Handler> for ViewClientActor { + type Result = Option; + + #[perf] + fn handle( + &mut self, + msg: WithSpanContext, + _ctx: &mut Self::Context, + ) -> Self::Result { + let (_span, msg) = handler_debug_span!(target: "client", msg); + let _timer = metrics::VIEW_CLIENT_MESSAGE_TIME + .with_label_values(&["StateRequestPart"]) + .start_timer(); + let StateRequestPart { shard_id, sync_hash, part_id } = msg; + if !self.check_state_sync_request() { + return None; + } + trace!(target: "sync", "Computing state request part {} {} {}", shard_id, sync_hash, part_id); + let state_response = match self.chain.check_sync_hash_validity(&sync_hash) { + Ok(true) => { + let part = match self.chain.get_state_response_part(shard_id, part_id, sync_hash) { + Ok(part) => Some((part_id, part)), + Err(e) => { + error!(target: "sync", "Cannot build sync part #{:?} (get_state_response_part): {}", part_id, e); + None } + }; + + trace!(target: "sync", "Finish computation for state request part {} {} {}", shard_id, sync_hash, part_id); + ShardStateSyncResponseV1 { header: None, part } + } + Ok(false) => { + warn!(target: "sync", "sync_hash {:?} didn't pass validation, possible malicious behavior", sync_hash); + return None; + } + Err(e) => match e { + near_chain::Error::DBNotFoundErr(_) => { + // This case may appear in case of latency in epoch switching. + // Request sender is ready to sync but we still didn't get the block. + info!(target: "sync", "Can't get sync_hash block {:?} for state request part", sync_hash); + ShardStateSyncResponseV1 { header: None, part: None } + } + _ => { + error!(target: "sync", "Failed to verify sync_hash {:?} validity, {:?}", sync_hash, e); + ShardStateSyncResponseV1 { header: None, part: None } + } + }, + }; + let info = + StateResponseInfo::V1(StateResponseInfoV1 { shard_id, sync_hash, state_response }); + Some(StateResponse(Box::new(info))) + } +} + +impl Handler> for ViewClientActor { + type Result = Result, ReasonForBan>; + + #[perf] + fn handle( + &mut self, + msg: WithSpanContext, + _ctx: &mut Self::Context, + ) -> Self::Result { + let (_span, msg) = handler_debug_span!(target: "client", msg); + let _timer = metrics::VIEW_CLIENT_MESSAGE_TIME + .with_label_values(&["AnnounceAccountRequest"]) + .start_timer(); + let AnnounceAccountRequest(announce_accounts) = msg; + + let mut filtered_announce_accounts = Vec::new(); + + for (announce_account, last_epoch) in announce_accounts { + // Keep the announcement if it is newer than the last announcement from + // the same account. + if let Some(last_epoch) = last_epoch { + match self.runtime_adapter.compare_epoch_id(&announce_account.epoch_id, &last_epoch) + { + Ok(Ordering::Greater) => {} + _ => continue, } + } - NetworkViewClientResponses::AnnounceAccount(filtered_announce_accounts) + match self.check_signature_account_announce(&announce_account) { + Ok(true) => { + filtered_announce_accounts.push(announce_account); + } + // TODO(gprusak): Here we ban for broadcasting accounts which have been slashed + // according to BlockInfo for the current chain tip. It is unfair, + // given that peers do not have perfectly synchronized heads: + // - AFAIU each block can introduce a slashed account, so the announcement + // could be OK at the moment that peer has sent it out. + // - the current epoch_id is not related to announce_account.epoch_id, + // so it carry a perfectly valid (outdated) information. + Ok(false) => { + return Err(ReasonForBan::InvalidSignature); + } + // Filter out this account. This covers both good reasons to ban the peer: + // - signature didn't match the data and public_key. + // - account is not a validator for the given epoch + // and cases when we were just unable to validate the data (so we shouldn't + // ban), for example when the node is not aware of the public key for the given + // (account_id,epoch_id) pair. + // We currently do NOT ban the peer for either. + // TODO(gprusak): consider whether we should change that. + Err(e) => { + debug!(target: "view_client", "Failed to validate account announce signature: {}", e); + } } } + Ok(filtered_announce_accounts) } } diff --git a/chain/jsonrpc/src/lib.rs b/chain/jsonrpc/src/lib.rs index 1d99dba85f6..716c2a8a621 100644 --- a/chain/jsonrpc/src/lib.rs +++ b/chain/jsonrpc/src/lib.rs @@ -17,12 +17,11 @@ use tokio::time::{sleep, timeout}; use tracing::info; use near_chain_configs::GenesisConfig; -use near_client::adapter::{NetworkClientMessages, NetworkClientResponses}; use near_client::{ ClientActor, DebugStatus, GetBlock, GetBlockProof, GetChunk, GetExecutionOutcome, GetGasPrice, GetNetworkInfo, GetNextLightClientBlock, GetProtocolConfig, GetReceipt, GetStateChanges, - GetStateChangesInBlock, GetValidatorInfo, GetValidatorOrdered, Query, Status, TxStatus, - ViewClientActor, + GetStateChangesInBlock, GetValidatorInfo, GetValidatorOrdered, ProcessTxRequest, + ProcessTxResponse, Query, Status, TxStatus, ViewClientActor, }; pub use near_jsonrpc_client as client; use near_jsonrpc_primitives::errors::RpcError; @@ -139,11 +138,11 @@ where #[easy_ext::ext(FromNetworkClientResponses)] impl near_jsonrpc_primitives::types::transactions::RpcTransactionError { - pub fn from_network_client_responses(responses: NetworkClientResponses) -> Self { - match responses { - NetworkClientResponses::InvalidTx(context) => Self::InvalidTransaction { context }, - NetworkClientResponses::NoResponse => Self::TimeoutError, - NetworkClientResponses::DoesNotTrackShard | NetworkClientResponses::RequestRouted => { + pub fn from_network_client_responses(resp: ProcessTxResponse) -> Self { + match resp { + ProcessTxResponse::InvalidTx(context) => Self::InvalidTransaction { context }, + ProcessTxResponse::NoResponse => Self::TimeoutError, + ProcessTxResponse::DoesNotTrackShard | ProcessTxResponse::RequestRouted => { Self::DoesNotTrackShard } internal_error => Self::InternalError { debug_info: format!("{:?}", internal_error) }, @@ -441,7 +440,7 @@ impl JsonRpcHandler { let tx = request_data.signed_transaction; let hash = tx.get_hash().clone(); self.client_addr.do_send( - NetworkClientMessages::Transaction { + ProcessTxRequest { transaction: tx, is_forwarded: false, check_only: false, // if we set true here it will not actually send the transaction @@ -525,7 +524,7 @@ impl JsonRpcHandler { .. }) => { if let near_jsonrpc_primitives::types::transactions::TransactionInfo::Transaction(tx) = &tx_info { - if let Ok(NetworkClientResponses::InvalidTx(context)) = + if let Ok(ProcessTxResponse::InvalidTx(context)) = self.send_tx(tx.clone(), true).await { break Err( @@ -601,21 +600,15 @@ impl JsonRpcHandler { &self, tx: SignedTransaction, check_only: bool, - ) -> Result< - NetworkClientResponses, - near_jsonrpc_primitives::types::transactions::RpcTransactionError, - > { + ) -> Result + { let tx_hash = tx.get_hash(); let signer_account_id = tx.transaction.signer_id.clone(); let response = self .client_addr .send( - NetworkClientMessages::Transaction { - transaction: tx, - is_forwarded: false, - check_only, - } - .with_span_context(), + ProcessTxRequest { transaction: tx, is_forwarded: false, check_only } + .with_span_context(), ) .await .map_err(RpcFrom::rpc_from)?; @@ -623,12 +616,12 @@ impl JsonRpcHandler { // If we receive InvalidNonce error, it might be the case that the transaction was // resubmitted, and we should check if that is the case and return ValidTx response to // maintain idempotence of the send_tx method. - if let NetworkClientResponses::InvalidTx( + if let ProcessTxResponse::InvalidTx( near_primitives::errors::InvalidTxError::InvalidNonce { .. }, ) = response { if self.tx_exists(tx_hash, &signer_account_id).await? { - return Ok(NetworkClientResponses::ValidTx); + return Ok(ProcessTxResponse::ValidTx); } } @@ -643,12 +636,12 @@ impl JsonRpcHandler { near_jsonrpc_primitives::types::transactions::RpcTransactionError, > { match self.send_tx(request_data.clone().signed_transaction, false).await? { - NetworkClientResponses::ValidTx => { + ProcessTxResponse::ValidTx => { Ok(near_jsonrpc_primitives::types::transactions::RpcBroadcastTxSyncResponse { transaction_hash: request_data.signed_transaction.get_hash(), }) } - NetworkClientResponses::RequestRouted => { + ProcessTxResponse::RequestRouted => { Err(near_jsonrpc_primitives::types::transactions::RpcTransactionError::RequestRouted { transaction_hash: request_data.signed_transaction.get_hash(), }) @@ -669,19 +662,19 @@ impl JsonRpcHandler { near_jsonrpc_primitives::types::transactions::RpcTransactionError, > { match self.send_tx(request_data.clone().signed_transaction, true).await? { - NetworkClientResponses::ValidTx => { + ProcessTxResponse::ValidTx => { Ok(near_jsonrpc_primitives::types::transactions::RpcBroadcastTxSyncResponse { transaction_hash: request_data.signed_transaction.get_hash(), }) } - NetworkClientResponses::RequestRouted => { + ProcessTxResponse::RequestRouted => { Err(near_jsonrpc_primitives::types::transactions::RpcTransactionError::RequestRouted { transaction_hash: request_data.signed_transaction.get_hash(), }) } - network_client_responses => Err( + resp => Err( near_jsonrpc_primitives::types::transactions::RpcTransactionError::from_network_client_responses( - network_client_responses + resp ) ) } @@ -717,7 +710,7 @@ impl JsonRpcHandler { _ => {} } match self.send_tx(tx.clone(), false).await? { - NetworkClientResponses::ValidTx | NetworkClientResponses::RequestRouted => { + ProcessTxResponse::ValidTx | ProcessTxResponse::RequestRouted => { self.tx_polling(near_jsonrpc_primitives::types::transactions::TransactionInfo::Transaction(tx)).await } network_client_response=> { @@ -1165,10 +1158,8 @@ impl JsonRpcHandler { actix::spawn( self.client_addr .send( - near_client::adapter::NetworkClientMessages::Adversarial( - near_network::types::NetworkAdversarialMessage::AdvSetSyncInfo(height), - ) - .with_span_context(), + near_network::types::NetworkAdversarialMessage::AdvSetSyncInfo(height) + .with_span_context(), ) .map(|_| ()), ); @@ -1179,20 +1170,16 @@ impl JsonRpcHandler { actix::spawn( self.client_addr .send( - near_client::adapter::NetworkClientMessages::Adversarial( - near_network::types::NetworkAdversarialMessage::AdvDisableHeaderSync, - ) - .with_span_context(), + near_network::types::NetworkAdversarialMessage::AdvDisableHeaderSync + .with_span_context(), ) .map(|_| ()), ); actix::spawn( self.view_client_addr .send( - near_client::adapter::NetworkViewClientMessages::Adversarial( - near_network::types::NetworkAdversarialMessage::AdvDisableHeaderSync, - ) - .with_span_context(), + near_network::types::NetworkAdversarialMessage::AdvDisableHeaderSync + .with_span_context(), ) .map(|_| ()), ); @@ -1203,20 +1190,16 @@ impl JsonRpcHandler { actix::spawn( self.client_addr .send( - NetworkClientMessages::Adversarial( - near_network::types::NetworkAdversarialMessage::AdvDisableDoomslug, - ) - .with_span_context(), + near_network::types::NetworkAdversarialMessage::AdvDisableDoomslug + .with_span_context(), ) .map(|_| ()), ); actix::spawn( self.view_client_addr .send( - near_client::adapter::NetworkViewClientMessages::Adversarial( - near_network::types::NetworkAdversarialMessage::AdvDisableDoomslug, - ) - .with_span_context(), + near_network::types::NetworkAdversarialMessage::AdvDisableDoomslug + .with_span_context(), ) .map(|_| ()), ); @@ -1228,10 +1211,8 @@ impl JsonRpcHandler { actix::spawn( self.client_addr .send( - NetworkClientMessages::Adversarial( - near_network::types::NetworkAdversarialMessage::AdvProduceBlocks( - num_blocks, only_valid, - ), + near_network::types::NetworkAdversarialMessage::AdvProduceBlocks( + num_blocks, only_valid, ) .with_span_context(), ) @@ -1245,20 +1226,16 @@ impl JsonRpcHandler { actix::spawn( self.client_addr .send( - NetworkClientMessages::Adversarial( - near_network::types::NetworkAdversarialMessage::AdvSwitchToHeight(height), - ) - .with_span_context(), + near_network::types::NetworkAdversarialMessage::AdvSwitchToHeight(height) + .with_span_context(), ) .map(|_| ()), ); actix::spawn( self.view_client_addr .send( - near_client::adapter::NetworkViewClientMessages::Adversarial( - near_network::types::NetworkAdversarialMessage::AdvSwitchToHeight(height), - ) - .with_span_context(), + near_network::types::NetworkAdversarialMessage::AdvSwitchToHeight(height) + .with_span_context(), ) .map(|_| ()), ); @@ -1269,16 +1246,14 @@ impl JsonRpcHandler { match self .client_addr .send( - NetworkClientMessages::Adversarial( - near_network::types::NetworkAdversarialMessage::AdvGetSavedBlocks, - ) - .with_span_context(), + near_network::types::NetworkAdversarialMessage::AdvGetSavedBlocks + .with_span_context(), ) .await { Ok(result) => match result { - NetworkClientResponses::AdvResult(value) => serialize_response(value), - _ => Err(RpcError::server_error::(None)), + Some(value) => serialize_response(value), + None => Err(RpcError::server_error::(None)), }, _ => Err(RpcError::server_error::(None)), } @@ -1288,16 +1263,14 @@ impl JsonRpcHandler { match self .client_addr .send( - NetworkClientMessages::Adversarial( - near_network::types::NetworkAdversarialMessage::AdvCheckStorageConsistency, - ) - .with_span_context(), + near_network::types::NetworkAdversarialMessage::AdvCheckStorageConsistency + .with_span_context(), ) .await { Ok(result) => match result { - NetworkClientResponses::AdvResult(value) => serialize_response(value), - _ => Err(RpcError::server_error::(None)), + Some(value) => serialize_response(value), + None => Err(RpcError::server_error::(None)), }, _ => Err(RpcError::server_error::(None)), } diff --git a/chain/rosetta-rpc/src/lib.rs b/chain/rosetta-rpc/src/lib.rs index f7dd91bf7a9..d0e03915069 100644 --- a/chain/rosetta-rpc/src/lib.rs +++ b/chain/rosetta-rpc/src/lib.rs @@ -741,7 +741,7 @@ async fn construction_submit( let transaction_hash = signed_transaction.as_ref().get_hash(); let transaction_submittion = client_addr .send( - near_client::adapter::NetworkClientMessages::Transaction { + near_client::ProcessTxRequest { transaction: signed_transaction.into_inner(), is_forwarded: false, check_only: false, @@ -750,15 +750,14 @@ async fn construction_submit( ) .await?; match transaction_submittion { - near_client::adapter::NetworkClientResponses::ValidTx - | near_client::adapter::NetworkClientResponses::RequestRouted => { + near_client::ProcessTxResponse::ValidTx | near_client::ProcessTxResponse::RequestRouted => { Ok(Json(models::TransactionIdentifierResponse { transaction_identifier: models::TransactionIdentifier::transaction( &transaction_hash, ), })) } - near_client::adapter::NetworkClientResponses::InvalidTx(error) => { + near_client::ProcessTxResponse::InvalidTx(error) => { Err(errors::ErrorKind::InvalidInput(error.to_string()).into()) } _ => Err(errors::ErrorKind::InternalInvariantError(format!( diff --git a/integration-tests/src/tests/client/chunks_management.rs b/integration-tests/src/tests/client/chunks_management.rs index 905ab84464b..f86a5fc397f 100644 --- a/integration-tests/src/tests/client/chunks_management.rs +++ b/integration-tests/src/tests/client/chunks_management.rs @@ -7,9 +7,8 @@ use near_chunks::{ CHUNK_REQUEST_RETRY_MS, CHUNK_REQUEST_SWITCH_TO_FULL_FETCH_MS, CHUNK_REQUEST_SWITCH_TO_OTHERS_MS, }; -use near_client::adapter::NetworkClientMessages; use near_client::test_utils::setup_mock_all_validators; -use near_client::{ClientActor, GetBlock, ViewClientActor}; +use near_client::{ClientActor, GetBlock, ProcessTxRequest, ViewClientActor}; use near_network::types::PeerManagerMessageRequest; use near_network::types::{AccountIdOrPeerTrackingShard, PeerInfo}; use near_network::types::{NetworkRequests, NetworkResponses}; @@ -253,7 +252,7 @@ impl Test { let block_hash = res.unwrap().unwrap().header.hash; let connectors_ = connectors.write().unwrap(); connectors_[0].0.do_send( - NetworkClientMessages::Transaction { + ProcessTxRequest { transaction: SignedTransaction::empty(block_hash), is_forwarded: false, check_only: false, @@ -261,7 +260,7 @@ impl Test { .with_span_context(), ); connectors_[1].0.do_send( - NetworkClientMessages::Transaction { + ProcessTxRequest { transaction: SignedTransaction::empty(block_hash), is_forwarded: false, check_only: false, @@ -269,7 +268,7 @@ impl Test { .with_span_context(), ); connectors_[2].0.do_send( - NetworkClientMessages::Transaction { + ProcessTxRequest { transaction: SignedTransaction::empty(block_hash), is_forwarded: false, check_only: false, diff --git a/integration-tests/src/tests/client/features/access_key_nonce_for_implicit_accounts.rs b/integration-tests/src/tests/client/features/access_key_nonce_for_implicit_accounts.rs index 69ae0899968..d2fece52fb5 100644 --- a/integration-tests/src/tests/client/features/access_key_nonce_for_implicit_accounts.rs +++ b/integration-tests/src/tests/client/features/access_key_nonce_for_implicit_accounts.rs @@ -5,8 +5,8 @@ use assert_matches::assert_matches; use near_chain::chain::NUM_ORPHAN_ANCESTORS_CHECK; use near_chain::{ChainGenesis, Error, Provenance, RuntimeAdapter}; use near_chain_configs::Genesis; -use near_client::adapter::NetworkClientResponses; use near_client::test_utils::{create_chunk_with_transactions, TestEnv}; +use near_client::ProcessTxResponse; use near_crypto::{InMemorySigner, KeyType, Signer}; use near_network::types::{MsgRecipient, NetworkRequests, PeerManagerMessageRequest}; use near_o11y::testonly::init_test_logger; @@ -93,13 +93,13 @@ fn test_transaction_hash_collision() { *genesis_block.hash(), ); let res = env.clients[0].process_tx(create_account_tx, false, false); - assert_matches!(res, NetworkClientResponses::ValidTx); + assert_matches!(res, ProcessTxResponse::ValidTx); for i in 4..8 { env.produce_block(0, i); } let res = env.clients[0].process_tx(send_money_tx, false, false); - assert_matches!(res, NetworkClientResponses::InvalidTx(_)); + assert_matches!(res, ProcessTxResponse::InvalidTx(_)); } /// Helper for checking that duplicate transactions from implicit accounts are properly rejected. @@ -108,7 +108,7 @@ fn test_transaction_hash_collision() { /// should fail since the protocol upgrade. fn get_status_of_tx_hash_collision_for_implicit_account( protocol_version: ProtocolVersion, -) -> NetworkClientResponses { +) -> ProcessTxResponse { let epoch_length = 100; let mut genesis = Genesis::test(vec!["test0".parse().unwrap(), "test1".parse().unwrap()], 1); genesis.config.epoch_length = epoch_length; @@ -197,7 +197,7 @@ fn test_transaction_hash_collision_for_implicit_account_fail() { let protocol_version = ProtocolFeature::AccessKeyNonceForImplicitAccounts.protocol_version(); assert_matches!( get_status_of_tx_hash_collision_for_implicit_account(protocol_version), - NetworkClientResponses::InvalidTx(InvalidTxError::InvalidNonce { .. }) + ProcessTxResponse::InvalidTx(InvalidTxError::InvalidNonce { .. }) ); } @@ -208,7 +208,7 @@ fn test_transaction_hash_collision_for_implicit_account_ok() { ProtocolFeature::AccessKeyNonceForImplicitAccounts.protocol_version() - 1; assert_matches!( get_status_of_tx_hash_collision_for_implicit_account(protocol_version), - NetworkClientResponses::ValidTx + ProcessTxResponse::ValidTx ); } @@ -263,10 +263,7 @@ fn test_transaction_nonce_too_large() { *genesis_block.hash(), ); let res = env.clients[0].process_tx(tx, false, false); - assert_matches!( - res, - NetworkClientResponses::InvalidTx(InvalidTxError::InvalidAccessKeyError(_)) - ); + assert_matches!(res, ProcessTxResponse::InvalidTx(InvalidTxError::InvalidAccessKeyError(_))); } /// This test tests the logic regarding requesting chunks for orphan. diff --git a/integration-tests/src/tests/client/features/account_id_in_function_call_permission.rs b/integration-tests/src/tests/client/features/account_id_in_function_call_permission.rs index 1ff9fcaee4d..7af8b9546fa 100644 --- a/integration-tests/src/tests/client/features/account_id_in_function_call_permission.rs +++ b/integration-tests/src/tests/client/features/account_id_in_function_call_permission.rs @@ -1,7 +1,7 @@ use near_chain::{ChainGenesis, RuntimeAdapter}; use near_chain_configs::Genesis; -use near_client::adapter::NetworkClientResponses; use near_client::test_utils::TestEnv; +use near_client::ProcessTxResponse; use near_crypto::{InMemorySigner, KeyType, Signer}; use near_primitives::account::{AccessKey, AccessKeyPermission, FunctionCallPermission}; use near_primitives::errors::{ActionsValidationError, InvalidTxError}; @@ -69,7 +69,7 @@ fn test_account_id_in_function_call_permission_upgrade() { let signed_transaction = Transaction { nonce: 10, block_hash: tip.last_block_hash, ..tx.clone() }.sign(&signer); let res = env.clients[0].process_tx(signed_transaction, false, false); - assert_eq!(res, NetworkClientResponses::ValidTx); + assert_eq!(res, ProcessTxResponse::ValidTx); for i in 0..3 { env.produce_block(0, tip.height + i + 1); } @@ -85,7 +85,7 @@ fn test_account_id_in_function_call_permission_upgrade() { let res = env.clients[0].process_tx(signed_transaction, false, false); assert_eq!( res, - NetworkClientResponses::InvalidTx(InvalidTxError::ActionsValidation( + ProcessTxResponse::InvalidTx(InvalidTxError::ActionsValidation( ActionsValidationError::InvalidAccountId { account_id: "#".to_string() } )) ) @@ -135,7 +135,7 @@ fn test_very_long_account_id() { let res = env.clients[0].process_tx(tx, false, false); assert_eq!( res, - NetworkClientResponses::InvalidTx(InvalidTxError::ActionsValidation( + ProcessTxResponse::InvalidTx(InvalidTxError::ActionsValidation( ActionsValidationError::InvalidAccountId { account_id: "A".repeat(128) } )) ) diff --git a/integration-tests/src/tests/client/process_blocks.rs b/integration-tests/src/tests/client/process_blocks.rs index 4d281abfb95..32d33410b0f 100644 --- a/integration-tests/src/tests/client/process_blocks.rs +++ b/integration-tests/src/tests/client/process_blocks.rs @@ -21,11 +21,13 @@ use near_chain::{ }; use near_chain_configs::{ClientConfig, Genesis, DEFAULT_GC_NUM_EPOCHS_TO_KEEP}; use near_chunks::{ChunkStatus, ShardsManager}; -use near_client::adapter::{NetworkClientMessages, NetworkClientResponses}; use near_client::test_utils::{ create_chunk_on_height, setup_client, setup_mock, setup_mock_all_validators, TestEnv, }; -use near_client::{Client, GetBlock, GetBlockWithMerkleTree}; +use near_client::{ + BlockApproval, BlockResponse, Client, GetBlock, GetBlockWithMerkleTree, ProcessTxRequest, + ProcessTxResponse, SetNetworkInfo, +}; use near_crypto::{InMemorySigner, KeyType, PublicKey, Signature, Signer}; use near_network::test_utils::{wait_or_panic, MockPeerManagerAdapter}; use near_network::types::{ @@ -325,7 +327,7 @@ fn produce_blocks_with_tx() { let actor = actor.then(move |res| { let block_hash = res.unwrap().unwrap().header.hash; client.do_send( - NetworkClientMessages::Transaction { + ProcessTxRequest { transaction: SignedTransaction::empty(block_hash), is_forwarded: false, check_only: false, @@ -402,7 +404,7 @@ fn receive_network_block() { None, ); client.do_send( - NetworkClientMessages::Block(block, PeerInfo::random().id, false) + BlockResponse { block, peer_id: PeerInfo::random().id, was_requested: false } .with_span_context(), ); future::ready(()) @@ -485,8 +487,12 @@ fn produce_block_with_approvals() { None, ); client.do_send( - NetworkClientMessages::Block(block.clone(), PeerInfo::random().id, false) - .with_span_context(), + BlockResponse { + block: block.clone(), + peer_id: PeerInfo::random().id, + was_requested: false, + } + .with_span_context(), ); for i in 3..11 { @@ -504,10 +510,7 @@ fn produce_block_with_approvals() { 10, // the height at which "test1" is producing &signer, ); - client.do_send( - NetworkClientMessages::BlockApproval(approval, PeerInfo::random().id) - .with_span_context(), - ); + client.do_send(BlockApproval(approval, PeerInfo::random().id).with_span_context()); } future::ready(()) @@ -558,11 +561,11 @@ fn produce_block_with_approvals_arrived_early() { for (i, (client, _)) in conns.iter().enumerate() { if i > 0 { client.do_send( - NetworkClientMessages::Block( - block.clone(), - PeerInfo::random().id, - false, - ) + BlockResponse { + block: block.clone(), + peer_id: PeerInfo::random().id, + was_requested: false, + } .with_span_context(), ) } @@ -583,11 +586,11 @@ fn produce_block_with_approvals_arrived_early() { if approval_counter == 3 { let block = block_holder.read().unwrap().clone().unwrap(); conns[0].0.do_send( - NetworkClientMessages::Block( - block, - PeerInfo::random().id, - false, - ) + BlockResponse { + block: block, + peer_id: PeerInfo::random().id, + was_requested: false, + } .with_span_context(), ); } @@ -701,8 +704,12 @@ fn invalid_blocks_common(is_requested: bool) { block.mut_header().get_mut().inner_rest.chunk_mask = vec![]; block.mut_header().get_mut().init(); client.do_send( - NetworkClientMessages::Block(block.clone(), PeerInfo::random().id, is_requested) - .with_span_context(), + BlockResponse { + block: block.clone(), + peer_id: PeerInfo::random().id, + was_requested: is_requested, + } + .with_span_context(), ); // Send blocks with invalid protocol version @@ -713,11 +720,11 @@ fn invalid_blocks_common(is_requested: bool) { PROTOCOL_VERSION - 1; block.mut_header().get_mut().init(); client.do_send( - NetworkClientMessages::Block( - block.clone(), - PeerInfo::random().id, - is_requested, - ) + BlockResponse { + block: block.clone(), + peer_id: PeerInfo::random().id, + was_requested: is_requested, + } .with_span_context(), ); } @@ -739,26 +746,34 @@ fn invalid_blocks_common(is_requested: bool) { }; block.set_chunks(chunks); client.do_send( - NetworkClientMessages::Block(block.clone(), PeerInfo::random().id, is_requested) - .with_span_context(), + BlockResponse { + block: block.clone(), + peer_id: PeerInfo::random().id, + was_requested: is_requested, + } + .with_span_context(), ); // Send proper block. let block2 = valid_block; client.do_send( - NetworkClientMessages::Block(block2.clone(), PeerInfo::random().id, is_requested) - .with_span_context(), + BlockResponse { + block: block2.clone(), + peer_id: PeerInfo::random().id, + was_requested: is_requested, + } + .with_span_context(), ); if is_requested { let mut block3 = block2; block3.mut_header().get_mut().inner_rest.chunk_headers_root = hash(&[1]); block3.mut_header().get_mut().init(); client.do_send( - NetworkClientMessages::Block( - block3.clone(), - PeerInfo::random().id, - is_requested, - ) + BlockResponse { + block: block3.clone(), + peer_id: PeerInfo::random().id, + was_requested: is_requested, + } .with_span_context(), ); } @@ -871,11 +886,11 @@ fn ban_peer_for_invalid_block_common(mode: InvalidBlockMode) { for (i, (client, _)) in conns.clone().into_iter().enumerate() { if i != block_producer_idx { client.do_send( - NetworkClientMessages::Block( - block_mut.clone(), - PeerInfo::random().id, - false, - ) + BlockResponse { + block: block_mut.clone(), + peer_id: PeerInfo::random().id, + was_requested: false, + } .with_span_context(), ) } @@ -1010,7 +1025,7 @@ fn client_sync_headers() { }), ); client.do_send( - NetworkClientMessages::NetworkInfo(NetworkInfo { + SetNetworkInfo(NetworkInfo { connected_peers: vec![ConnectedPeerInfo::from(&FullPeerInfo { peer_info: peer_info2.clone(), chain_info: PeerChainInfoV2 { @@ -1070,7 +1085,7 @@ fn test_process_invalid_tx() { } assert_eq!( env.clients[0].process_tx(tx, false, false), - NetworkClientResponses::InvalidTx(InvalidTxError::Expired) + ProcessTxResponse::InvalidTx(InvalidTxError::Expired) ); let tx2 = SignedTransaction::new( Signature::empty(KeyType::ED25519), @@ -1085,7 +1100,7 @@ fn test_process_invalid_tx() { ); assert_eq!( env.clients[0].process_tx(tx2, false, false), - NetworkClientResponses::InvalidTx(InvalidTxError::Expired) + ProcessTxResponse::InvalidTx(InvalidTxError::Expired) ); } diff --git a/integration-tests/src/tests/nearcore/stake_nodes.rs b/integration-tests/src/tests/nearcore/stake_nodes.rs index 63aab2f56a3..6da9ea32a9a 100644 --- a/integration-tests/src/tests/nearcore/stake_nodes.rs +++ b/integration-tests/src/tests/nearcore/stake_nodes.rs @@ -11,8 +11,7 @@ use crate::genesis_helpers::genesis_hash; use crate::test_helpers::heavy_test; use near_actix_test_utils::run_actix; use near_chain_configs::Genesis; -use near_client::adapter::NetworkClientMessages; -use near_client::{ClientActor, GetBlock, Query, Status, ViewClientActor}; +use near_client::{ClientActor, GetBlock, ProcessTxRequest, Query, Status, ViewClientActor}; use near_crypto::{InMemorySigner, KeyType}; use near_network::test_utils::{convert_boot_nodes, open_port, WaitOrTimeoutActor}; use near_o11y::testonly::init_integration_logger; @@ -126,7 +125,7 @@ fn test_stake_nodes() { test_nodes[0] .client .send( - NetworkClientMessages::Transaction { + ProcessTxRequest { transaction: tx, is_forwarded: false, check_only: false, @@ -221,7 +220,7 @@ fn test_validator_kickout() { test_node .client .send( - NetworkClientMessages::Transaction { + ProcessTxRequest { transaction: stake_transaction, is_forwarded: false, check_only: false, @@ -379,7 +378,7 @@ fn test_validator_join() { test_nodes[1] .client .send( - NetworkClientMessages::Transaction { + ProcessTxRequest { transaction: unstake_transaction, is_forwarded: false, check_only: false, @@ -392,7 +391,7 @@ fn test_validator_join() { test_nodes[0] .client .send( - NetworkClientMessages::Transaction { + ProcessTxRequest { transaction: stake_transaction, is_forwarded: false, check_only: false, diff --git a/integration-tests/src/tests/nearcore/sync_nodes.rs b/integration-tests/src/tests/nearcore/sync_nodes.rs index b04d3b0e0a3..e0e12801a7b 100644 --- a/integration-tests/src/tests/nearcore/sync_nodes.rs +++ b/integration-tests/src/tests/nearcore/sync_nodes.rs @@ -10,8 +10,7 @@ use crate::test_helpers::heavy_test; use near_actix_test_utils::run_actix; use near_chain::Block; use near_chain_configs::Genesis; -use near_client::adapter::NetworkClientMessages; -use near_client::{ClientActor, GetBlock}; +use near_client::{BlockResponse, ClientActor, GetBlock, ProcessTxRequest}; use near_crypto::{InMemorySigner, KeyType}; use near_network::test_utils::{convert_boot_nodes, open_port, WaitOrTimeoutActor}; use near_network::types::PeerInfo; @@ -89,8 +88,12 @@ fn add_blocks( ); block_merkle_tree.insert(*block.hash()); let _ = client.do_send( - NetworkClientMessages::Block(block.clone(), PeerInfo::random().id, false) - .with_span_context(), + BlockResponse { + block: block.clone(), + peer_id: PeerInfo::random().id, + was_requested: false, + } + .with_span_context(), ); blocks.push(block); prev = &blocks[blocks.len() - 1]; @@ -277,7 +280,7 @@ fn sync_state_stake_change() { actix::spawn( client1 .send( - NetworkClientMessages::Transaction { + ProcessTxRequest { transaction: unstake_transaction, is_forwarded: false, check_only: false, diff --git a/tools/mirror/src/lib.rs b/tools/mirror/src/lib.rs index 34aa5817125..932749f32ad 100644 --- a/tools/mirror/src/lib.rs +++ b/tools/mirror/src/lib.rs @@ -2,8 +2,8 @@ use actix::Addr; use anyhow::Context; use borsh::{BorshDeserialize, BorshSerialize}; use near_chain_configs::GenesisValidationMode; -use near_client::adapter::{NetworkClientMessages, NetworkClientResponses}; use near_client::{ClientActor, ViewClientActor}; +use near_client::{ProcessTxRequest, ProcessTxResponse}; use near_client_primitives::types::{ GetBlock, GetBlockError, GetChunk, GetChunkError, GetExecutionOutcome, GetExecutionOutcomeError, GetExecutionOutcomeResponse, Query, QueryError, @@ -492,7 +492,7 @@ impl TxMirror { match self .target_client .send( - NetworkClientMessages::Transaction { + ProcessTxRequest { transaction: tx.target_tx.clone(), is_forwarded: false, check_only: false, @@ -501,11 +501,11 @@ impl TxMirror { ) .await? { - NetworkClientResponses::RequestRouted => { + ProcessTxResponse::RequestRouted => { crate::metrics::TRANSACTIONS_SENT.with_label_values(&["ok"]).inc(); txs.push(tx); } - NetworkClientResponses::InvalidTx(e) => { + ProcessTxResponse::InvalidTx(e) => { // TODO: here if we're getting an error because the tx was already included, it is possible // that some other instance of this code ran and made progress already. For now we can assume // only once instance of this code will run, but this is the place to detect if that's not the case. diff --git a/tools/mock-node/src/lib.rs b/tools/mock-node/src/lib.rs index b75106cec74..e2687630faf 100644 --- a/tools/mock-node/src/lib.rs +++ b/tools/mock-node/src/lib.rs @@ -1,11 +1,10 @@ //! Implements `ChainHistoryAccess` and `MockPeerManagerActor`, which is the main //! components of the mock network. -use actix::{Actor, Context, Handler, Recipient}; +use actix::{Actor, Context, Handler}; use anyhow::{anyhow, Context as AnyhowContext}; use near_chain::{Block, BlockHeader, Chain, ChainStoreAccess, Error}; use near_chain_configs::GenesisConfig; -use near_client::adapter::NetworkClientMessages; use near_client::sync; use near_network::types::{ FullPeerInfo, NetworkInfo, NetworkRequests, NetworkResponses, PeerManagerMessageRequest, @@ -14,7 +13,7 @@ use near_network::types::{ use near_network::types::{ PartialEdgeInfo, PartialEncodedChunkRequestMsg, PartialEncodedChunkResponseMsg, PeerInfo, }; -use near_o11y::{handler_debug_span, OpenTelemetrySpanExt, WithSpanContext, WithSpanContextExt}; +use near_o11y::{handler_debug_span, OpenTelemetrySpanExt, WithSpanContext}; use near_performance_metrics::actix::run_later; use near_primitives::block::GenesisId; use near_primitives::hash::CryptoHash; @@ -24,6 +23,7 @@ use near_primitives::types::{BlockHeight, ShardId}; use serde::Deserialize; use std::collections::{HashMap, HashSet}; use std::path::Path; +use std::sync::Arc; use std::time::Duration; pub mod setup; @@ -194,7 +194,7 @@ impl IncomingRequests { /// - Simulates block production and sends the most "recent" block to ClientActor pub struct MockPeerManagerActor { /// Client address for the node that we are testing - client_addr: Recipient>, + client: Arc, /// Access a pre-generated chain history from storage chain_history_access: ChainHistoryAccess, /// Current network state for the simulated network @@ -210,7 +210,7 @@ pub struct MockPeerManagerActor { impl MockPeerManagerActor { fn new( - client_addr: Recipient>, + client: Arc, genesis_config: &GenesisConfig, mut chain: Chain, client_start_height: BlockHeight, @@ -251,7 +251,7 @@ impl MockPeerManagerActor { target_height, ); Self { - client_addr, + client, chain_history_access: ChainHistoryAccess { chain, target_height }, network_info, block_production_delay, @@ -265,9 +265,11 @@ impl MockPeerManagerActor { /// When it is called, it increments peer heights by 1 and sends the block at that height /// to ClientActor. In a way, it simulates peers that broadcast new blocks fn update_peers(&mut self, ctx: &mut Context) { - let _response = self.client_addr.do_send( - NetworkClientMessages::NetworkInfo(self.network_info.clone()).with_span_context(), - ); + actix::spawn({ + let client = self.client.clone(); + let info = self.network_info.clone(); + async move { client.network_info(info).await } + }); for connected_peer in self.network_info.connected_peers.iter_mut() { let peer = &mut connected_peer.full_peer_info; let current_height = peer.chain_info.height; @@ -275,10 +277,11 @@ impl MockPeerManagerActor { if let Ok(block) = self.chain_history_access.retrieve_block_by_height(current_height) { - let _response = self.client_addr.do_send( - NetworkClientMessages::Block(block, peer.peer_info.id.clone(), false) - .with_span_context(), - ); + actix::spawn({ + let client = self.client.clone(); + let peer_id = peer.peer_info.id.clone(); + async move { client.block(block, peer_id, false).await } + }); } peer.chain_info.height = current_height + 1; } @@ -296,14 +299,13 @@ impl MockPeerManagerActor { fn send_unrequested_block(&mut self, ctx: &mut Context) { if let Some((interval, block)) = &self.incoming_requests.block { - let _response = self.client_addr.do_send( - NetworkClientMessages::Block( - block.clone(), - self.network_info.connected_peers[0].full_peer_info.peer_info.id.clone(), - false, - ) - .with_span_context(), - ); + actix::spawn({ + let client = self.client.clone(); + let block = block.clone(); + let peer_id = + self.network_info.connected_peers[0].full_peer_info.peer_info.id.clone(); + async move { client.block(block, peer_id, false).await } + }); run_later(ctx, *interval, move |act, ctx| { act.send_unrequested_block(ctx); @@ -313,16 +315,21 @@ impl MockPeerManagerActor { fn send_chunk_request(&mut self, ctx: &mut Context) { if let Some((interval, request)) = &self.incoming_requests.chunk_request { - let _response = self.client_addr.do_send( - NetworkClientMessages::PartialEncodedChunkRequest( - request.clone(), - // this can just be nonsense since the PeerManager is mocked out anyway. If/when we update the mock node - // to exercise the PeerManager code as well, then this won't matter anyway since the mock code won't be - // responsible for it. - CryptoHash::default(), - ) - .with_span_context(), - ); + actix::spawn({ + let client = self.client.clone(); + let request = request.clone(); + async move { + client + .partial_encoded_chunk_request( + request.clone(), + // this can just be nonsense since the PeerManager is mocked out anyway. If/when we update the mock node + // to exercise the PeerManager code as well, then this won't matter anyway since the mock code won't be + // responsible for it. + CryptoHash::default(), + ) + .await + } + }); run_later(ctx, *interval, move |act, ctx| { act.send_chunk_request(ctx); @@ -366,9 +373,10 @@ impl Handler> for MockPeerManagerActo NetworkRequests::BlockRequest { hash, peer_id } => { run_later(ctx, self.network_delay, move |act, _ctx| { let block = act.chain_history_access.retrieve_block(&hash).unwrap(); - let _response = act.client_addr.do_send( - NetworkClientMessages::Block(block, peer_id, true).with_span_context(), - ); + actix::spawn({ + let client = act.client.clone(); + async move { client.block(block, peer_id, true).await } + }); }); } NetworkRequests::BlockHeadersRequest { hashes, peer_id } => { @@ -377,10 +385,10 @@ impl Handler> for MockPeerManagerActo .chain_history_access .retrieve_block_headers(hashes.clone()) .unwrap(); - let _response = act.client_addr.do_send( - NetworkClientMessages::BlockHeaders(headers, peer_id) - .with_span_context(), - ); + actix::spawn({ + let client = act.client.clone(); + async move { client.block_headers(headers, peer_id).await } + }); }); } NetworkRequests::PartialEncodedChunkRequest { request, .. } => { @@ -389,13 +397,17 @@ impl Handler> for MockPeerManagerActo .chain_history_access .retrieve_partial_encoded_chunk(&request) .unwrap(); - let _response = act.client_addr.do_send( - NetworkClientMessages::PartialEncodedChunkResponse( - response, - Clock::instant(), - ) - .with_span_context(), - ); + actix::spawn({ + let client = act.client.clone(); + async move { + client + .partial_encoded_chunk_response( + response, + Clock::instant().into(), + ) + .await + } + }); }); } NetworkRequests::PartialEncodedChunkResponse { .. } => {} diff --git a/tools/mock-node/src/setup.rs b/tools/mock-node/src/setup.rs index ddceb207a20..b2e31af10e6 100644 --- a/tools/mock-node/src/setup.rs +++ b/tools/mock-node/src/setup.rs @@ -1,18 +1,16 @@ //! Provides functions for setting up a mock network from configs and home dirs. use crate::{MockNetworkConfig, MockPeerManagerActor}; -use actix::{Actor, Addr, Arbiter, Recipient}; +use actix::{Actor, Addr, Arbiter}; use anyhow::Context; use near_chain::ChainStoreUpdate; use near_chain::{ Chain, ChainGenesis, ChainStore, ChainStoreAccess, DoomslugThresholdMode, RuntimeAdapter, }; use near_chain_configs::GenesisConfig; -use near_client::adapter::NetworkClientMessages; use near_client::{start_client, start_view_client, ClientActor, ViewClientActor}; use near_epoch_manager::{EpochManager, EpochManagerAdapter}; use near_network::types::NetworkRecipient; -use near_o11y::WithSpanContext; use near_primitives::state_part::PartId; use near_primitives::syncing::get_num_state_parts; use near_primitives::types::BlockHeight; @@ -45,7 +43,7 @@ fn setup_runtime( fn setup_mock_peer_manager_actor( chain: Chain, - client_addr: Recipient>, + client: Arc, genesis_config: &GenesisConfig, block_production_delay: Duration, client_start_height: BlockHeight, @@ -59,7 +57,7 @@ fn setup_mock_peer_manager_actor( Some(it) => it, }; MockPeerManagerActor::new( - client_addr, + client, genesis_config, chain, client_start_height, @@ -261,6 +259,7 @@ pub fn setup_mock_node( let arbiter = Arbiter::new(); let client1 = client.clone(); + let view_client1 = view_client.clone(); let genesis_config = config.genesis.config.clone(); let archival = config.client_config.archive; let network_config = network_config.clone(); @@ -279,7 +278,7 @@ pub fn setup_mock_node( MockPeerManagerActor::start_in_arbiter(&arbiter.handle(), move |_ctx| { setup_mock_peer_manager_actor( chain, - client1.recipient(), + Arc::new(near_client::adapter::Adapter::new(client1, view_client1)), &genesis_config, block_production_delay, client_start_height, @@ -311,8 +310,7 @@ mod tests { use futures::{future, FutureExt}; use near_actix_test_utils::{run_actix, spawn_interruptible}; use near_chain_configs::Genesis; - use near_client::adapter::NetworkClientMessages; - use near_client::GetBlock; + use near_client::{GetBlock, ProcessTxRequest}; use near_crypto::{InMemorySigner, KeyType}; use near_network::test_utils::{open_port, WaitOrTimeoutActor}; use near_o11y::testonly::init_integration_logger; @@ -385,7 +383,7 @@ mod tests { spawn_interruptible( client1 .send( - NetworkClientMessages::Transaction { + ProcessTxRequest { transaction, is_forwarded: false, check_only: false, diff --git a/tools/state-viewer/src/apply_chunk.rs b/tools/state-viewer/src/apply_chunk.rs index 53160c076a0..99a3ff783af 100644 --- a/tools/state-viewer/src/apply_chunk.rs +++ b/tools/state-viewer/src/apply_chunk.rs @@ -407,8 +407,8 @@ pub(crate) fn apply_receipt( mod test { use near_chain::{ChainGenesis, ChainStore, ChainStoreAccess, Provenance}; use near_chain_configs::Genesis; - use near_client::adapter::NetworkClientResponses; use near_client::test_utils::TestEnv; + use near_client::ProcessTxResponse; use near_crypto::{InMemorySigner, KeyType}; use near_epoch_manager::EpochManagerAdapter; use near_primitives::hash::CryptoHash; @@ -438,7 +438,7 @@ mod test { hash, ); let response = env.clients[0].process_tx(tx, false, false); - assert_eq!(response, NetworkClientResponses::ValidTx); + assert_eq!(response, ProcessTxResponse::ValidTx); } } From ec1db8ba5284d4331247d4b0b64022afe5807c0d Mon Sep 17 00:00:00 2001 From: posvyatokum Date: Wed, 26 Oct 2022 18:56:06 +0100 Subject: [PATCH 034/103] store: adding State to cold columns (#7926) --- core/store/src/cold_storage.rs | 61 +++++++++++++++++-- core/store/src/columns.rs | 2 +- core/store/src/metrics.rs | 9 +++ core/store/src/trie/mod.rs | 14 +++++ .../src/tests/client/cold_storage.rs | 33 ++++++++-- 5 files changed, 108 insertions(+), 11 deletions(-) diff --git a/core/store/src/cold_storage.rs b/core/store/src/cold_storage.rs index 23e311946cb..52b341096a9 100644 --- a/core/store/src/cold_storage.rs +++ b/core/store/src/cold_storage.rs @@ -1,8 +1,10 @@ use crate::columns::DBKeyType; use crate::refcount::add_positive_refcount; -use crate::{DBCol, DBTransaction, Database, Store}; +use crate::trie::TrieRefcountChange; +use crate::{DBCol, DBTransaction, Database, Store, TrieChanges}; use borsh::BorshDeserialize; +use near_primitives::shard_layout::ShardLayout; use near_primitives::types::BlockHeight; use std::collections::HashMap; use std::io; @@ -38,13 +40,14 @@ struct StoreWithCache<'a> { pub fn update_cold_db( cold_db: &dyn Database, hot_store: &Store, + shard_layout: &ShardLayout, height: &BlockHeight, ) -> io::Result<()> { let _span = tracing::debug_span!(target: "store", "update cold db", height = height); let mut store_with_cache = StoreWithCache { store: hot_store, cache: StoreCache::new() }; - let key_type_to_keys = get_keys_from_store(&mut store_with_cache, height)?; + let key_type_to_keys = get_keys_from_store(&mut store_with_cache, shard_layout, height)?; for col in DBCol::iter() { if col.is_cold() { copy_from_store( @@ -116,6 +119,10 @@ pub fn test_cold_genesis_update(cold_db: &dyn Database, hot_store: &Store) -> io Ok(()) } +pub fn test_get_store_reads(column: DBCol) -> u64 { + crate::metrics::COLD_MIGRATION_READS.with_label_values(&[<&str>::from(column)]).get() +} + /// Returns HashMap from DBKeyType to possible keys of that type for provided height. /// Only constructs keys for key types that are used in cold columns. /// The goal is to capture all changes to db made during production of the block at provided height. @@ -124,6 +131,7 @@ pub fn test_cold_genesis_update(cold_db: &dyn Database, hot_store: &Store) -> io /// But for TransactionHash, for example, it is all of the tx hashes in that block. fn get_keys_from_store( store: &mut StoreWithCache, + shard_layout: &ShardLayout, height: &BlockHeight, ) -> io::Result>> { let mut key_type_to_keys = HashMap::new(); @@ -136,6 +144,35 @@ fn get_keys_from_store( key_type, match key_type { DBKeyType::BlockHash => vec![block_hash_key.clone()], + DBKeyType::ShardUId => shard_layout + .get_shard_uids() + .iter() + .map(|uid| uid.to_bytes().to_vec()) + .collect(), + // TODO: don't write values of State column to cache. Write them directly to colddb. + DBKeyType::TrieNodeOrValueHash => { + let mut keys = vec![]; + for shard_uid in shard_layout.get_shard_uids() { + let shard_uid_key = shard_uid.to_bytes(); + + debug_assert_eq!( + DBCol::TrieChanges.key_type(), + &[DBKeyType::BlockHash, DBKeyType::ShardUId] + ); + let trie_changes_option: Option = store.get_ser( + DBCol::TrieChanges, + &join_two_keys(&block_hash_key, &shard_uid_key), + )?; + + if let Some(trie_changes) = trie_changes_option { + for op in trie_changes.insertions() { + store.insert_state_to_cache_from_op(op, &shard_uid_key); + keys.push(op.hash().as_bytes().to_vec()); + } + } + } + keys + } _ => { vec![] } @@ -146,6 +183,10 @@ fn get_keys_from_store( Ok(key_type_to_keys) } +pub fn join_two_keys(prefix_key: &[u8], suffix_key: &[u8]) -> StoreKey { + [prefix_key, suffix_key].concat() +} + /// Returns all possible keys for a column with key represented by a specific sequence of key types. /// `key_type_to_value` -- result of `get_keys_from_store`, mapping from KeyType to all possible keys of that type. /// `key_types` -- description of a final key, what sequence of key types forms a key, result of `DBCol::key_type`. @@ -179,9 +220,7 @@ fn combine_keys_with_stop( let mut result_keys = vec![]; for prefix_key in &all_smaller_keys { for suffix_key in &key_type_to_keys[last_kt] { - let mut new_key = prefix_key.clone(); - new_key.extend(suffix_key); - result_keys.push(new_key); + result_keys.push(join_two_keys(prefix_key, suffix_key)); } } result_keys @@ -202,6 +241,7 @@ where impl StoreWithCache<'_> { pub fn get(&mut self, column: DBCol, key: &[u8]) -> io::Result { if !self.cache.contains_key(&(column, key.to_vec())) { + crate::metrics::COLD_MIGRATION_READS.with_label_values(&[<&str>::from(column)]).inc(); self.cache.insert( (column.clone(), key.to_vec()), self.store.get(column, key)?.map(|x| x.as_slice().to_vec()), @@ -232,6 +272,17 @@ impl StoreWithCache<'_> { ) -> io::Result { option_to_not_found(self.get_ser(column, key), format_args!("{:?}: {:?}", column, key)) } + + pub fn insert_state_to_cache_from_op(&mut self, op: &TrieRefcountChange, shard_uid_key: &[u8]) { + debug_assert_eq!( + DBCol::State.key_type(), + &[DBKeyType::ShardUId, DBKeyType::TrieNodeOrValueHash] + ); + self.cache.insert( + (DBCol::State, join_two_keys(shard_uid_key, op.hash().as_bytes())), + Some(op.payload().to_vec()), + ); + } } #[cfg(test)] diff --git a/core/store/src/columns.rs b/core/store/src/columns.rs index a9f6188acf2..a95c0cc2df0 100644 --- a/core/store/src/columns.rs +++ b/core/store/src/columns.rs @@ -368,7 +368,7 @@ impl DBCol { /// Whether this column should be copied to the cold storage. pub const fn is_cold(&self) -> bool { match self { - DBCol::Block => true, + DBCol::Block | DBCol::State => true, _ => false, } } diff --git a/core/store/src/metrics.rs b/core/store/src/metrics.rs index 68d3fb0e71e..a569701809b 100644 --- a/core/store/src/metrics.rs +++ b/core/store/src/metrics.rs @@ -205,3 +205,12 @@ pub static PREFETCH_STAGED_SLOTS: Lazy = Lazy::new(|| { ) .unwrap() }); +#[cfg(feature = "cold_store")] +pub static COLD_MIGRATION_READS: Lazy = Lazy::new(|| { + try_create_int_counter_vec( + "near_cold_migration_reads", + "Number of get calls to hot store made for every column during copying data to cold storage.", + &["col"], + ) + .unwrap() +}); diff --git a/core/store/src/trie/mod.rs b/core/store/src/trie/mod.rs index c316de9f6fc..44af67260f5 100644 --- a/core/store/src/trie/mod.rs +++ b/core/store/src/trie/mod.rs @@ -498,6 +498,16 @@ pub struct TrieRefcountChange { rc: std::num::NonZeroU32, } +impl TrieRefcountChange { + pub fn hash(&self) -> &CryptoHash { + &self.trie_node_or_value_hash + } + + pub fn payload(&self) -> &[u8] { + self.trie_node_or_value.as_slice() + } +} + /// /// TrieChanges stores delta for refcount. /// Multiple versions of the state work the following way: @@ -533,6 +543,10 @@ impl TrieChanges { pub fn empty(old_root: StateRoot) -> Self { TrieChanges { old_root, new_root: old_root, insertions: vec![], deletions: vec![] } } + + pub fn insertions(&self) -> &[TrieRefcountChange] { + self.insertions.as_slice() + } } /// Result of applying state part to Trie. diff --git a/integration-tests/src/tests/client/cold_storage.rs b/integration-tests/src/tests/client/cold_storage.rs index e6309bdaacf..5c56e937025 100644 --- a/integration-tests/src/tests/client/cold_storage.rs +++ b/integration-tests/src/tests/client/cold_storage.rs @@ -7,7 +7,7 @@ use near_o11y::testonly::init_test_logger; use near_primitives::transaction::{ Action, DeployContractAction, FunctionCallAction, SignedTransaction, }; -use near_store::cold_storage::{test_cold_genesis_update, update_cold_db}; +use near_store::cold_storage::{test_cold_genesis_update, test_get_store_reads, update_cold_db}; use near_store::db::TestDB; use near_store::{DBCol, NodeStorage, Store, Temperature}; use nearcore::config::GenesisExt; @@ -22,10 +22,13 @@ fn check_key(first_store: &Store, second_store: &Store, col: DBCol, key: &[u8]) assert_eq!(first_res.unwrap(), second_res.unwrap()); } -fn check_iter(first_store: &Store, second_store: &Store, col: DBCol) { +fn check_iter(first_store: &Store, second_store: &Store, col: DBCol) -> u64 { + let mut num_checks = 0; for (key, _) in first_store.iter(col).map(Result::unwrap) { check_key(first_store, second_store, col, &key); + num_checks += 1; } + num_checks } /// Deploying test contract and calling write_random_value 5 times every block for 4 epochs. @@ -56,6 +59,8 @@ fn test_storage_after_commit_of_cold_update() { test_cold_genesis_update(&*cold_db, &env.clients[0].runtime_adapter.store()).unwrap(); + let state_reads = test_get_store_reads(DBCol::State); + for h in 1..max_height { let signer = InMemorySigner::from_seed("test0".parse().unwrap(), KeyType::ED25519, "test0"); if h == 1 { @@ -102,16 +107,34 @@ fn test_storage_after_commit_of_cold_update() { let block = env.clients[0].produce_block(h).unwrap().unwrap(); env.process_block(0, block.clone(), Provenance::PRODUCED); - last_hash = block.hash().clone(); + update_cold_db( + &*cold_db, + &env.clients[0].runtime_adapter.store(), + &env.clients[0] + .runtime_adapter + .get_shard_layout( + &env.clients[0] + .runtime_adapter + .get_epoch_id_from_prev_block(&last_hash) + .unwrap(), + ) + .unwrap(), + &h, + ) + .unwrap(); - update_cold_db(&*cold_db, &env.clients[0].runtime_adapter.store(), &h).unwrap(); + last_hash = block.hash().clone(); } + // assert that we don't read State from db, but from TrieChanges + assert_eq!(state_reads, test_get_store_reads(DBCol::State)); + let cold_store = NodeStorage::new(cold_db).get_store(Temperature::Hot); for col in DBCol::iter() { if col.is_cold() { - check_iter(&env.clients[0].runtime_adapter.store(), &cold_store, col); + // assert that this test actually checks something + assert!(check_iter(&env.clients[0].runtime_adapter.store(), &cold_store, col) > 0); } } } From ec32bebef127d1ce196f55e3e7028d0d0573a079 Mon Sep 17 00:00:00 2001 From: mzhangmzz <34969888+mzhangmzz@users.noreply.github.com> Date: Wed, 26 Oct 2022 16:13:38 -0400 Subject: [PATCH 035/103] Change debug page to display error messages in block processing (#7930) This PR implements #7843 It also displays information of blocks that are dropped because of height is already processed Screen Shot 2022-10-25 at 11 40 58 PM --- chain/chain/src/blocks_delay_tracker.rs | 40 +++++++++++++-- chain/chain/src/chain.rs | 59 +++++++++++++++++------ chain/chain/src/tests/simple_chain.rs | 6 +-- chain/client/src/client.rs | 7 ++- chain/jsonrpc/res/chain_n_chunk_info.html | 7 ++- core/primitives/src/views.rs | 12 ++++- 6 files changed, 105 insertions(+), 26 deletions(-) diff --git a/chain/chain/src/blocks_delay_tracker.rs b/chain/chain/src/blocks_delay_tracker.rs index 441c8ed4a2f..6c0827ecada 100644 --- a/chain/chain/src/blocks_delay_tracker.rs +++ b/chain/chain/src/blocks_delay_tracker.rs @@ -9,7 +9,7 @@ use near_primitives::time::Clock; use near_primitives::types::{BlockHeight, ShardId}; use near_primitives::views::{ BlockProcessingInfo, BlockProcessingStatus, ChainProcessingInfo, ChunkProcessingInfo, - ChunkProcessingStatus, + ChunkProcessingStatus, DroppedReason, }; use std::collections::{BTreeMap, HashMap}; use std::mem; @@ -58,6 +58,10 @@ pub struct BlockTrackingStats { pub removed_from_missing_chunks_timestamp: Option, /// Timestamp when block was done processing pub processed_timestamp: Option, + /// Whether the block is not processed because of different reasons + pub dropped: Option, + /// Stores the error message encountered during the processing of this block + pub error: Option, /// Only contains new chunks that belong to this block, if the block doesn't produce a new chunk /// for a shard, the corresponding item will be None. pub chunks: Vec>, @@ -164,12 +168,30 @@ impl BlocksDelayTracker { removed_from_orphan_timestamp: None, removed_from_missing_chunks_timestamp: None, processed_timestamp: None, + dropped: None, + error: None, chunks, }); self.blocks_height_map.entry(height).or_insert(vec![]).push(*block_hash); } } + pub fn mark_block_dropped(&mut self, block_hash: &CryptoHash, reason: DroppedReason) { + if let Some(block_entry) = self.blocks.get_mut(block_hash) { + block_entry.dropped = Some(reason); + } else { + error!(target:"blocks_delay_tracker", "block {:?} was dropped but was not marked received", block_hash); + } + } + + pub fn mark_block_errored(&mut self, block_hash: &CryptoHash, err: String) { + if let Some(block_entry) = self.blocks.get_mut(block_hash) { + block_entry.error = Some(err); + } else { + error!(target:"blocks_delay_tracker", "block {:?} was errored but was not marked received", block_hash); + } + } + pub fn mark_block_orphaned(&mut self, block_hash: &CryptoHash, timestamp: Instant) { if let Some(block_entry) = self.blocks.get_mut(block_hash) { block_entry.orphaned_timestamp = Some(timestamp); @@ -350,7 +372,7 @@ impl BlocksDelayTracker { }) .collect(); let now = Clock::instant(); - let block_status = chain.get_block_status(block_hash); + let block_status = chain.get_block_status(block_hash, block_stats); let in_progress_ms = block_stats .processed_timestamp .unwrap_or(now) @@ -391,7 +413,11 @@ impl BlocksDelayTracker { } impl Chain { - fn get_block_status(&self, block_hash: &CryptoHash) -> BlockProcessingStatus { + fn get_block_status( + &self, + block_hash: &CryptoHash, + block_info: &BlockTrackingStats, + ) -> BlockProcessingStatus { if self.is_orphan(block_hash) { return BlockProcessingStatus::Orphan; } @@ -402,7 +428,13 @@ impl Chain { return BlockProcessingStatus::InProcessing; } if self.store().block_exists(block_hash).unwrap_or_default() { - return BlockProcessingStatus::Processed; + return BlockProcessingStatus::Accepted; + } + if let Some(dropped_reason) = &block_info.dropped { + return BlockProcessingStatus::Dropped(dropped_reason.clone()); + } + if let Some(error) = &block_info.error { + return BlockProcessingStatus::Error(error.clone()); } return BlockProcessingStatus::Unknown; } diff --git a/chain/chain/src/chain.rs b/chain/chain/src/chain.rs index b40646ea249..084696d5a43 100644 --- a/chain/chain/src/chain.rs +++ b/chain/chain/src/chain.rs @@ -46,9 +46,9 @@ use near_primitives::types::{ use near_primitives::unwrap_or_return; use near_primitives::utils::MaybeValidated; use near_primitives::views::{ - BlockStatusView, ExecutionOutcomeWithIdView, ExecutionStatusView, FinalExecutionOutcomeView, - FinalExecutionOutcomeWithReceiptView, FinalExecutionStatus, LightClientBlockView, - SignedTransactionView, + BlockStatusView, DroppedReason, ExecutionOutcomeWithIdView, ExecutionStatusView, + FinalExecutionOutcomeView, FinalExecutionOutcomeWithReceiptView, FinalExecutionStatus, + LightClientBlockView, SignedTransactionView, }; #[cfg(feature = "protocol_feature_flat_state")] use near_store::{flat_state, StorageError}; @@ -1624,14 +1624,10 @@ impl Chain { apply_chunks_done_callback: DoneApplyChunkCallback, ) -> Result<(), Error> { let block_received_time = Clock::instant(); - self.blocks_delay_tracker.mark_block_received( - block.get_inner(), - block_received_time, - Clock::utc(), - ); metrics::BLOCK_PROCESSING_ATTEMPTS_TOTAL.inc(); let block_height = block.header().height(); + let hash = *block.hash(); let res = self.start_process_block_impl( me, block, @@ -1641,6 +1637,10 @@ impl Chain { block_received_time, ); + if matches!(res, Err(Error::TooManyProcessingBlocks)) { + self.blocks_delay_tracker + .mark_block_dropped(&hash, DroppedReason::TooManyProcessingBlocks); + } // Save the block as processed even if it failed. This is used to filter out the // incoming blocks that are not requested on heights which we already processed. // If there is a new incoming block that we didn't request and we already have height @@ -2066,6 +2066,20 @@ impl Chain { } } + fn postprocess_block_only( + &mut self, + me: &Option, + block: &Block, + block_preprocess_info: BlockPreprocessInfo, + apply_results: Vec>, + ) -> Result, Error> { + let mut chain_update = self.chain_update(); + let new_head = + chain_update.postprocess_block(me, &block, block_preprocess_info, apply_results)?; + chain_update.commit()?; + Ok(new_head) + } + /// Run postprocessing on this block, which stores the block on chain. /// Check that if accepting the block unlocks any orphans in the orphan pool and start /// the processing of those blocks. @@ -2092,12 +2106,16 @@ impl Chain { .entered(); let prev_head = self.store.head()?; - let mut chain_update = self.chain_update(); let provenance = block_preprocess_info.provenance.clone(); let block_start_processing_time = block_preprocess_info.block_start_processing_time.clone(); let new_head = - chain_update.postprocess_block(me, &block, block_preprocess_info, apply_results)?; - chain_update.commit()?; + match self.postprocess_block_only(me, &block, block_preprocess_info, apply_results) { + Err(err) => { + self.blocks_delay_tracker.mark_block_errored(&block_hash, err.to_string()); + return Err(err); + } + Ok(new_head) => new_head, + }; // Update flat storage head to be the last final block. Note that this update happens // in a separate db transaction from the update from block processing. This is intentional @@ -3232,7 +3250,12 @@ impl Chain { let block = self.store.get_block(block_hash)?; let prev_block = self.store.get_block(block.header().prev_hash())?; let mut chain_update = self.chain_update(); - chain_update.apply_chunk_postprocessing(me, &block, &prev_block, results)?; + chain_update.apply_chunk_postprocessing( + me, + &block, + &prev_block, + results.into_iter().collect::, Error>>()?, + )?; chain_update.commit()?; Ok(()) } @@ -4577,13 +4600,13 @@ impl<'a> ChainUpdate<'a> { me: &Option, block: &Block, prev_block: &Block, - apply_results: Vec>, + apply_results: Vec, ) -> Result<(), Error> { let _span = tracing::debug_span!(target: "chain", "apply_chunk_postprocessing").entered(); for result in apply_results { self.process_apply_chunk_result( me, - result?, + result, *block.hash(), block.header().height(), *prev_block.hash(), @@ -4867,7 +4890,13 @@ impl<'a> ChainUpdate<'a> { ) -> Result, Error> { let prev_hash = block.header().prev_hash(); let prev_block = self.chain_store_update.get_block(prev_hash)?; - self.apply_chunk_postprocessing(me, block, &prev_block, apply_chunks_results)?; + let results = apply_chunks_results.into_iter().map(|x| { + if let Err(err) = &x { + warn!(target:"chain", hash = %block.hash(), error = %err, "Error in applying chunks for block"); + } + x + }).collect::, Error>>()?; + self.apply_chunk_postprocessing(me, block, &prev_block, results)?; let BlockPreprocessInfo { is_caught_up, diff --git a/chain/chain/src/tests/simple_chain.rs b/chain/chain/src/tests/simple_chain.rs index a0fbcc67b67..b2008c7b301 100644 --- a/chain/chain/src/tests/simple_chain.rs +++ b/chain/chain/src/tests/simple_chain.rs @@ -53,13 +53,11 @@ fn build_chain() { // - one time for validating block header mock_clock_guard.add_utc(chrono::Utc.ymd(2020, 10, 1).and_hms_milli(0, 0, 3, 444 + i)); mock_clock_guard.add_utc(chrono::Utc.ymd(2020, 10, 1).and_hms_milli(0, 0, 3, 444 + i)); - mock_clock_guard.add_utc(chrono::Utc.ymd(2020, 10, 1).and_hms_milli(0, 0, 3, 444 + i)); // Instant calls for CryptoHashTimer. mock_clock_guard.add_instant(Instant::now()); mock_clock_guard.add_instant(Instant::now()); mock_clock_guard.add_instant(Instant::now()); mock_clock_guard.add_instant(Instant::now()); - mock_clock_guard.add_instant(Instant::now()); let prev_hash = *chain.head_header().unwrap().hash(); let prev = chain.get_block(&prev_hash).unwrap(); @@ -68,8 +66,8 @@ fn build_chain() { assert_eq!(chain.head().unwrap().height, i as u64); } - assert_eq!(mock_clock_guard.utc_call_count(), 14); - assert_eq!(mock_clock_guard.instant_call_count(), 21); + assert_eq!(mock_clock_guard.utc_call_count(), 10); + assert_eq!(mock_clock_guard.instant_call_count(), 17); assert_eq!(chain.head().unwrap().height, 4); let hash = chain.head().unwrap().last_block_hash; diff --git a/chain/client/src/client.rs b/chain/client/src/client.rs index cefc6a0077b..469250a3e1c 100644 --- a/chain/client/src/client.rs +++ b/chain/client/src/client.rs @@ -55,7 +55,7 @@ use near_primitives::block_header::ApprovalType; use near_primitives::epoch_manager::RngSeed; use near_primitives::network::PeerId; use near_primitives::version::PROTOCOL_VERSION; -use near_primitives::views::CatchupStatusView; +use near_primitives::views::{CatchupStatusView, DroppedReason}; const NUM_REBROADCAST_BLOCKS: usize = 30; @@ -856,6 +856,7 @@ impl Client { } else { debug!(target: "client", error = %err, "Process block: refused by chain"); } + self.chain.blocks_delay_tracker.mark_block_errored(&hash, err.to_string()); } } @@ -871,9 +872,13 @@ impl Client { was_requested: bool, apply_chunks_done_callback: DoneApplyChunkCallback, ) -> Result<(), near_chain::Error> { + self.chain.blocks_delay_tracker.mark_block_received(&block, Clock::instant(), Clock::utc()); // To protect ourselves from spamming, we do some pre-check on block height before we do any // real processing. if !self.check_block_height(&block, was_requested)? { + self.chain + .blocks_delay_tracker + .mark_block_dropped(block.hash(), DroppedReason::HeightProcessed); return Ok(()); } let prev_hash = *block.header().prev_hash(); diff --git a/chain/jsonrpc/res/chain_n_chunk_info.html b/chain/jsonrpc/res/chain_n_chunk_info.html index 407ac883f86..80a3121f546 100644 --- a/chain/jsonrpc/res/chain_n_chunk_info.html +++ b/chain/jsonrpc/res/chain_n_chunk_info.html @@ -45,6 +45,11 @@ String(time.getUTCSeconds()).padStart(2, "0") + "." + String(time.getUTCMilliseconds()).padStart(3, '0') } + function printStatus(blockStatus) { + if (typeof blockStatus === "string") return blockStatus + return JSON.stringify(blockStatus) + } + function printDuration(start, end) { let duration = Date.parse(end) - Date.parse(start); if (duration > 0) { @@ -128,7 +133,7 @@ row.append($('').append(block.height)); row.append($('').append(block.hash)); row.append($('').append(prettyTime(block.received_timestamp))); - row.append($('').append(block.block_status)); + row.append($('').append(printStatus(block.block_status))); row.append($('').append(printTimeInMs(block.in_progress_ms))); row.append($('').append(printTimeInMs(block.orphaned_ms))); row.append($('').append(printTimeInMs(block.missing_chunks_ms))); diff --git a/core/primitives/src/views.rs b/core/primitives/src/views.rs index ec6f7865f00..b440a35d037 100644 --- a/core/primitives/src/views.rs +++ b/core/primitives/src/views.rs @@ -479,10 +479,20 @@ pub enum BlockProcessingStatus { Orphan, WaitingForChunks, InProcessing, - Processed, + Accepted, + Error(String), + Dropped(DroppedReason), Unknown, } +#[derive(BorshSerialize, BorshDeserialize, Serialize, Deserialize, Clone, Debug, PartialEq, Eq)] +pub enum DroppedReason { + // If the node has already processed a block at this height + HeightProcessed, + // If the block processing pool is full + TooManyProcessingBlocks, +} + #[derive(Serialize, Deserialize, Debug)] pub struct ChunkProcessingInfo { pub height_created: BlockHeight, From 480dfecf690db2bc6995adc2f0c75b27f11cea8b Mon Sep 17 00:00:00 2001 From: posvyatokum Date: Thu, 27 Oct 2022 11:32:39 +0100 Subject: [PATCH 036/103] store: adding StateChanges to cold columns (#7937) --- core/store/src/cold_storage.rs | 28 +++++++++++++++++++ core/store/src/columns.rs | 2 +- .../src/tests/client/cold_storage.rs | 3 ++ 3 files changed, 32 insertions(+), 1 deletion(-) diff --git a/core/store/src/cold_storage.rs b/core/store/src/cold_storage.rs index 52b341096a9..b022cab7c58 100644 --- a/core/store/src/cold_storage.rs +++ b/core/store/src/cold_storage.rs @@ -173,6 +173,20 @@ fn get_keys_from_store( } keys } + // TODO: write StateChanges values to colddb directly, not to cache. + DBKeyType::TrieKey => { + let mut keys = vec![]; + store.iter_prefix_with_callback( + DBCol::StateChanges, + &block_hash_key, + |full_key| { + let mut full_key = Vec::from(full_key); + full_key.drain(..block_hash_key.len()); + keys.push(full_key); + }, + )?; + keys + } _ => { vec![] } @@ -239,6 +253,20 @@ where #[allow(dead_code)] impl StoreWithCache<'_> { + pub fn iter_prefix_with_callback( + &mut self, + col: DBCol, + key_prefix: &[u8], + mut callback: impl FnMut(Box<[u8]>), + ) -> io::Result<()> { + for iter_result in self.store.iter_prefix(col, key_prefix) { + let (key, value) = iter_result?; + self.cache.insert((col, key.to_vec()), Some(value.into())); + callback(key); + } + Ok(()) + } + pub fn get(&mut self, column: DBCol, key: &[u8]) -> io::Result { if !self.cache.contains_key(&(column, key.to_vec())) { crate::metrics::COLD_MIGRATION_READS.with_label_values(&[<&str>::from(column)]).inc(); diff --git a/core/store/src/columns.rs b/core/store/src/columns.rs index a95c0cc2df0..7619fc057d8 100644 --- a/core/store/src/columns.rs +++ b/core/store/src/columns.rs @@ -368,7 +368,7 @@ impl DBCol { /// Whether this column should be copied to the cold storage. pub const fn is_cold(&self) -> bool { match self { - DBCol::Block | DBCol::State => true, + DBCol::Block | DBCol::State | DBCol::StateChanges => true, _ => false, } } diff --git a/integration-tests/src/tests/client/cold_storage.rs b/integration-tests/src/tests/client/cold_storage.rs index 5c56e937025..a42c2bbdde6 100644 --- a/integration-tests/src/tests/client/cold_storage.rs +++ b/integration-tests/src/tests/client/cold_storage.rs @@ -60,6 +60,7 @@ fn test_storage_after_commit_of_cold_update() { test_cold_genesis_update(&*cold_db, &env.clients[0].runtime_adapter.store()).unwrap(); let state_reads = test_get_store_reads(DBCol::State); + let state_changes_reads = test_get_store_reads(DBCol::StateChanges); for h in 1..max_height { let signer = InMemorySigner::from_seed("test0".parse().unwrap(), KeyType::ED25519, "test0"); @@ -128,6 +129,8 @@ fn test_storage_after_commit_of_cold_update() { // assert that we don't read State from db, but from TrieChanges assert_eq!(state_reads, test_get_store_reads(DBCol::State)); + // assert that we don't read StateChanges from db again after iter_prefix + assert_eq!(state_changes_reads, test_get_store_reads(DBCol::StateChanges)); let cold_store = NodeStorage::new(cold_db).get_store(Temperature::Hot); From e5c2661931e851676d54c5c018d1bcbf746c3578 Mon Sep 17 00:00:00 2001 From: mm-near <91919554+mm-near@users.noreply.github.com> Date: Thu, 27 Oct 2022 13:00:59 +0200 Subject: [PATCH 037/103] Changing peer selection process & show connection errors in debug page (#7910) Idea on how we could do a small change to peer selection process, by preferring to pick peers that we've used to connect to, in order to speed up connection process. And also updated network debug page to show the reason why the connection to peer has failed. --- chain/jsonrpc/res/network_info.html | 6 +- .../src/peer_manager/peer_manager_actor.rs | 62 +++++--- .../src/peer_manager/peer_store/mod.rs | 97 ++++++++++-- .../src/peer_manager/peer_store/tests.rs | 138 +++++++++++++++++- chain/network/src/store/schema/mod.rs | 1 + chain/network/src/types.rs | 10 ++ core/primitives/src/views.rs | 2 +- 7 files changed, 280 insertions(+), 36 deletions(-) diff --git a/chain/jsonrpc/res/network_info.html b/chain/jsonrpc/res/network_info.html index a367737f88b..f1a9e78a468 100644 --- a/chain/jsonrpc/res/network_info.html +++ b/chain/jsonrpc/res/network_info.html @@ -321,11 +321,13 @@ row.append($("").append(to_human_time(Math.floor(Date.now() / 1000) - element['first_seen']))); row.append($("").append(to_human_time(Math.floor(Date.now() / 1000) - element['last_seen']))); if (element['last_attempt'] != null) { - row.append($("").append(to_human_time(Math.floor(Date.now() / 1000) - element['last_attempt']))); + row.append($("").append(to_human_time(Math.floor(Date.now() / 1000) - element['last_attempt'][0]))); + row.append($("").append(element['status'] + " Last attempt: " + element['last_attempt'][1])); } else { row.append($("")); + row.append($("").append(element['status'])); } - row.append($("").append(element['status'])); + $(".tbody-detailed-peer-storage").append(row); }); $(".detailed-peer-storage-div").show(); diff --git a/chain/network/src/peer_manager/peer_manager_actor.rs b/chain/network/src/peer_manager/peer_manager_actor.rs index e3d2d9901ae..3d597a9aeb1 100644 --- a/chain/network/src/peer_manager/peer_manager_actor.rs +++ b/chain/network/src/peer_manager/peer_manager_actor.rs @@ -39,6 +39,7 @@ use near_primitives::types::AccountId; use near_primitives::views::{KnownPeerStateView, PeerStoreView}; use rand::seq::IteratorRandom; use rand::thread_rng; +use rand::Rng; use std::cmp::min; use std::collections::{HashMap, HashSet}; use std::net::SocketAddr; @@ -87,6 +88,11 @@ const UNRELIABLE_PEER_HORIZON: u64 = 60; /// Due to implementation limits of `Graph` in `near-network`, we support up to 128 client. pub const MAX_NUM_PEERS: usize = 128; +/// When picking a peer to connect to, we'll pick from the 'safer peers' +/// (a.k.a. ones that we've been connected to in the past) with these odds. +/// Otherwise, we'd pick any peer that we've heard about. +const PREFER_PREVIOUSLY_CONNECTED_PEER: f64 = 0.6; + #[derive(Clone, PartialEq, Eq)] struct WhitelistNode { id: PeerId, @@ -132,10 +138,8 @@ pub struct PeerManagerActor { /// reached. whitelist_nodes: Vec, + /// State that is shared between multiple threads (including PeerActors). pub(crate) state: Arc, - - /// Last time when we tried to establish connection to this peer. - last_peer_outbound_attempt: HashMap, } /// TEST-ONLY @@ -282,7 +286,6 @@ impl PeerManagerActor { started_connect_attempts: false, local_peer_pending_update_nonce_request: HashMap::new(), whitelist_nodes, - last_peer_outbound_attempt: Default::default(), state: Arc::new(NetworkState::new( &clock, store.clone(), @@ -773,32 +776,45 @@ impl PeerManagerActor { metrics::PEER_MANAGER_TRIGGER_TIME.with_label_values(&["monitor_peers"]).start_timer(); self.state.peer_store.unban(&self.clock); + if let Err(err) = self.state.peer_store.update_connected_peers_last_seen(&self.clock) { + error!(target: "network", ?err, "Failed to update peers last seen time."); + } if self.is_outbound_bootstrap_needed() { let tier2 = self.state.tier2.load(); - if let Some(peer_info) = self.state.peer_store.unconnected_peer(|peer_state| { - // Ignore connecting to ourself - self.my_peer_id == peer_state.peer_info.id + // With some odds - try picking one of the 'NotConnected' peers -- these are the ones that we were able to connect to in the past. + let prefer_previously_connected_peer = + thread_rng().gen_bool(PREFER_PREVIOUSLY_CONNECTED_PEER); + if let Some(peer_info) = self.state.peer_store.unconnected_peer( + |peer_state| { + // Ignore connecting to ourself + self.my_peer_id == peer_state.peer_info.id || self.config.node_addr == peer_state.peer_info.addr // Or to peers we are currently trying to connect to || tier2.outbound_handshakes.contains(&peer_state.peer_info.id) - }) { + }, + prefer_previously_connected_peer, + ) { // Start monitor_peers_attempts from start after we discover the first healthy peer if !self.started_connect_attempts { self.started_connect_attempts = true; interval = default_interval; } - self.last_peer_outbound_attempt.insert(peer_info.id.clone(), self.clock.now_utc()); ctx.spawn(wrap_future({ let state = self.state.clone(); let clock = self.clock.clone(); async move { - if let Err(err) = async { + let result = async { let stream = tcp::Stream::connect(&peer_info).await.context("tcp::Stream::connect()")?; - PeerActor::spawn(clock,stream,None,state.clone()).context("PeerActor::spawn()")?; + PeerActor::spawn(clock.clone(),stream,None,state.clone()).context("PeerActor::spawn()")?; anyhow::Ok(()) - }.await { - tracing::info!(target:"network", ?err, "failed to connect to {peer_info}"); + }.await; + + if result.is_err() { + tracing::info!(target:"network", ?result, "failed to connect to {peer_info}"); + } + if state.peer_store.peer_connection_attempt(&clock, &peer_info.id, result).is_err() { + error!(target: "network", ?peer_info, "Failed to mark peer as failed."); } } })); @@ -1464,14 +1480,24 @@ impl Handler for PeerManagerActor { addr: format!("{:?}", known_peer_state.peer_info.addr), first_seen: known_peer_state.first_seen.unix_timestamp(), last_seen: known_peer_state.last_seen.unix_timestamp(), - last_attempt: self - .last_peer_outbound_attempt - .get(peer_id) - .map(|it| it.unix_timestamp()), + last_attempt: known_peer_state.last_outbound_attempt.clone().map( + |(attempt_time, attempt_result)| { + let foo = match attempt_result { + Ok(_) => String::from("Ok"), + Err(err) => format!("Error: {:?}", err.as_str()), + }; + (attempt_time.unix_timestamp(), foo) + }, + ), }) .collect::>(); - peer_states_view.sort_by_key(|a| (-a.last_attempt.unwrap_or(0), -a.last_seen)); + peer_states_view.sort_by_key(|a| { + ( + -a.last_attempt.clone().map(|(attempt_time, _)| attempt_time).unwrap_or(0), + -a.last_seen, + ) + }); DebugStatus::PeerStore(PeerStoreView { peer_states: peer_states_view }) } } diff --git a/chain/network/src/peer_manager/peer_store/mod.rs b/chain/network/src/peer_manager/peer_store/mod.rs index fc12e5e30ff..ea536310b6a 100644 --- a/chain/network/src/peer_manager/peer_store/mod.rs +++ b/chain/network/src/peer_manager/peer_store/mod.rs @@ -267,16 +267,25 @@ impl PeerStore { let mut peers_to_keep = vec![]; let mut peers_to_delete = vec![]; for (peer_id, peer_state) in store.list_peer_states()? { - // If it’s already banned, keep it banned. Otherwise, it’s not connected. - let status = if peer_state.status.is_banned() { - if config.connect_only_to_boot_nodes && boot_nodes.contains(&peer_id) { - // Give boot node another chance. - KnownPeerStatus::NotConnected - } else { - peer_state.status + let status = match peer_state.status { + KnownPeerStatus::Unknown => { + // We mark boot nodes as 'NotConnected', as we trust that they exist. + if boot_nodes.contains(&peer_id) { + KnownPeerStatus::NotConnected + } else { + KnownPeerStatus::Unknown + } + } + KnownPeerStatus::NotConnected => KnownPeerStatus::NotConnected, + KnownPeerStatus::Connected => KnownPeerStatus::NotConnected, + KnownPeerStatus::Banned(reason, deadline) => { + if config.connect_only_to_boot_nodes && boot_nodes.contains(&peer_id) { + // Give boot node another chance. + KnownPeerStatus::NotConnected + } else { + KnownPeerStatus::Banned(reason, deadline) + } } - } else { - KnownPeerStatus::NotConnected }; let peer_state = KnownPeerState { @@ -284,6 +293,7 @@ impl PeerStore { first_seen: peer_state.first_seen, last_seen: peer_state.last_seen, status, + last_outbound_attempt: None, }; let is_blacklisted = @@ -347,6 +357,12 @@ impl PeerStore { self.0.lock().peer_states.values().filter(|st| st.status.is_banned()).count() } + #[allow(dead_code)] + /// Returns the state of the current peer in memory. + pub(crate) fn get_peer_state(&self, peer_id: &PeerId) -> Option { + self.0.lock().peer_states.get(peer_id).cloned() + } + pub(crate) fn peer_connected( &self, clock: &time::Clock, @@ -361,6 +377,24 @@ impl PeerStore { Ok(store.set_peer_state(&peer_info.id, entry)?) } + /// Update the 'last_seen' time for all the peers that we're currently connected to. + pub(crate) fn update_connected_peers_last_seen( + &self, + clock: &time::Clock, + ) -> anyhow::Result<()> { + let mut inner = self.0.lock(); + let mut store = inner.store.clone(); + for (peer_id, peer_state) in inner.peer_states.iter_mut() { + if peer_state.status == KnownPeerStatus::Connected + && clock.now_utc() > peer_state.last_seen.saturating_add(time::Duration::minutes(1)) + { + peer_state.last_seen = clock.now_utc(); + store.set_peer_state(peer_id, peer_state)? + } + } + Ok(()) + } + pub(crate) fn peer_disconnected( &self, clock: &time::Clock, @@ -378,6 +412,31 @@ impl PeerStore { Ok(()) } + /// Records the last attempt to connect to peer. + /// Marks the peer as Unknown (as we failed to connect to it). + pub(crate) fn peer_connection_attempt( + &self, + clock: &time::Clock, + peer_id: &PeerId, + result: Result<(), anyhow::Error>, + ) -> anyhow::Result<()> { + let mut inner = self.0.lock(); + let mut store = inner.store.clone(); + + if let Some(peer_state) = inner.peer_states.get_mut(peer_id) { + if result.is_err() { + peer_state.status = KnownPeerStatus::Unknown; + } + peer_state.last_outbound_attempt = + Some((clock.now_utc(), result.map_err(|err| err.to_string()))); + peer_state.last_seen = clock.now_utc(); + store.set_peer_state(peer_id, peer_state)?; + } else { + bail!("Peer {} is missing in the peer store", peer_id); + } + Ok(()) + } + pub(crate) fn peer_ban( &self, clock: &time::Clock, @@ -403,8 +462,28 @@ impl PeerStore { pub(crate) fn unconnected_peer( &self, ignore_fn: impl Fn(&KnownPeerState) -> bool, + prefer_previously_connected_peer: bool, ) -> Option { let inner = self.0.lock(); + if prefer_previously_connected_peer { + let preferred_peer = inner.find_peers( + |p| { + (p.status == KnownPeerStatus::NotConnected) + && !ignore_fn(p) + && p.peer_info.addr.is_some() + // if we're connecting only to the boot nodes - filter out the nodes that are not bootnodes. + && (!inner.config.connect_only_to_boot_nodes || inner.boot_nodes.contains(&p.peer_info.id)) + }, + 1, + ) + .get(0) + .cloned(); + // If we found a preferred peer - return it. + if preferred_peer.is_some() { + return preferred_peer; + }; + // otherwise, pick a peer from the wider pool below. + } inner.find_peers( |p| { (p.status == KnownPeerStatus::NotConnected || p.status == KnownPeerStatus::Unknown) diff --git a/chain/network/src/peer_manager/peer_store/tests.rs b/chain/network/src/peer_manager/peer_store/tests.rs index 45101e6039a..3831e9adf32 100644 --- a/chain/network/src/peer_manager/peer_store/tests.rs +++ b/chain/network/src/peer_manager/peer_store/tests.rs @@ -2,7 +2,7 @@ use super::*; use crate::blacklist::Blacklist; use crate::time; use near_crypto::{KeyType, SecretKey}; -use near_store::{NodeStorage, StoreOpener}; +use near_store::{Mode, NodeStorage, StoreOpener}; use std::collections::HashSet; use std::net::{Ipv4Addr, SocketAddrV4}; @@ -85,8 +85,134 @@ fn test_unconnected_peer() { store, ) .unwrap(); - assert!(peer_store.unconnected_peer(|_| false).is_some()); - assert!(peer_store.unconnected_peer(|_| true).is_none()); + assert!(peer_store.unconnected_peer(|_| false, false).is_some()); + assert!(peer_store.unconnected_peer(|_| true, false).is_none()); + } +} + +#[test] +fn test_unknown_vs_not_connected() { + use KnownPeerStatus::{Connected, NotConnected, Unknown}; + let clock = time::FakeClock::default(); + let peer_info_a = gen_peer_info(0); + let peer_info_b = gen_peer_info(1); + let (_tmp_dir, opener) = NodeStorage::test_opener(); + let peer_info_boot_node = gen_peer_info(2); + let boot_nodes = vec![peer_info_boot_node.clone()]; + + let nodes = [&peer_info_a, &peer_info_b, &peer_info_boot_node]; + + let get_in_memory_status = |peer_store: &PeerStore| { + nodes.map(|peer| peer_store.get_peer_state(&peer.id).map(|known_state| known_state.status)) + }; + + let get_database_status = || { + let store = crate::store::Store::from(opener.open_in_mode(Mode::ReadOnly).unwrap()); + let peers_state: HashMap = + store.list_peer_states().unwrap().into_iter().map(|x| (x.0, x.1)).collect(); + nodes.map(|peer| peers_state.get(&peer.id).map(|known_state| known_state.status.clone())) + }; + + { + let store = store::Store::from(opener.open().unwrap()); + let peer_store = PeerStore::new( + &clock.clock(), + make_config(&boot_nodes, Blacklist::default(), false), + store, + ) + .unwrap(); + + // Check the status of the in-memory store. + // Boot node should be marked as not-connected, as we've verified it. + // TODO(mm-near) - the boot node should have been added as 'NotConnected' and not Unknown. + assert_eq!(get_in_memory_status(&peer_store), [None, None, Some(Unknown)]); + + // Add the remaining peers. + peer_store.add_direct_peer(&clock.clock(), peer_info_a.clone()).unwrap(); + peer_store.add_direct_peer(&clock.clock(), peer_info_b.clone()).unwrap(); + + // Check the state in a database. + // Seems that boot node is not added to the database when 'new' is called. + assert_eq!(get_database_status(), [Some(Unknown), Some(Unknown), None]); + + assert_eq!( + get_in_memory_status(&peer_store), + [Some(Unknown), Some(Unknown), Some(Unknown)] + ); + + // Connect to both nodes + for peer_info in [peer_info_a.clone(), peer_info_b.clone()] { + peer_store.peer_connected(&clock.clock(), &peer_info).unwrap(); + } + assert_eq!( + get_in_memory_status(&peer_store), + [Some(Connected), Some(Connected), Some(Unknown)] + ); + assert_eq!(get_database_status(), [Some(Connected), Some(Connected), None]); + + // Disconnect from 'b' + peer_store.peer_disconnected(&clock.clock(), &peer_info_b.id).unwrap(); + + assert_eq!( + get_in_memory_status(&peer_store), + [Some(Connected), Some(NotConnected), Some(Unknown)] + ); + assert_eq!(get_database_status(), [Some(Connected), Some(NotConnected), None]); + + // if we prefer 'previously connected' peers - we should keep picking 'b'. + assert_eq!( + (0..10) + .map(|_| peer_store.unconnected_peer(|_| false, true).unwrap().id) + .collect::>(), + [peer_info_b.id.clone()].into_iter().collect() + ); + + // if we don't care, we should pick either 'b' or 'boot'. + assert_eq!( + (0..100) + .map(|_| peer_store.unconnected_peer(|_| false, false).unwrap().id) + .collect::>(), + [peer_info_b.id.clone(), peer_info_boot_node.id.clone()].into_iter().collect() + ); + + // And fail when trying to reconnect to b. + peer_store + .peer_connection_attempt( + &clock.clock(), + &peer_info_b.id, + Err(anyhow::anyhow!("b failed to connect error")), + ) + .unwrap(); + + // It should move 'back' into Unknown state. + assert_eq!( + get_in_memory_status(&peer_store), + [Some(Connected), Some(Unknown), Some(Unknown)] + ); + assert_eq!(get_database_status(), [Some(Connected), Some(Unknown), None]); + } + + { + // Let's reset the store. + let store = store::Store::from(opener.open().unwrap()); + let peer_store = PeerStore::new( + &clock.clock(), + make_config(&boot_nodes, Blacklist::default(), false), + store, + ) + .unwrap(); + assert_eq!( + get_in_memory_status(&peer_store), + [Some(NotConnected), Some(Unknown), Some(Unknown)] + ); + assert_eq!(get_database_status(), [Some(Connected), Some(Unknown), None]); + // After restart - we should try to connect to 'a' (if we prefer previously connected nodes). + assert_eq!( + (0..10) + .map(|_| peer_store.unconnected_peer(|_| false, true).unwrap().id) + .collect::>(), + [peer_info_a.id.clone()].into_iter().collect() + ); } } @@ -110,7 +236,7 @@ fn test_unconnected_peer_only_boot_nodes() { .unwrap(); peer_store.add_direct_peer(&clock.clock(), peer_in_store.clone()).unwrap(); peer_store.peer_connected(&clock.clock(), &peer_info_a).unwrap(); - assert_eq!(peer_store.unconnected_peer(|_| false), Some(peer_in_store.clone())); + assert_eq!(peer_store.unconnected_peer(|_| false, false), Some(peer_in_store.clone())); } // 1 boot node (peer_info_a) that we're already connected to. @@ -126,7 +252,7 @@ fn test_unconnected_peer_only_boot_nodes() { .unwrap(); peer_store.add_direct_peer(&clock.clock(), peer_in_store.clone()).unwrap(); peer_store.peer_connected(&clock.clock(), &peer_info_a).unwrap(); - assert_eq!(peer_store.unconnected_peer(|_| false), None); + assert_eq!(peer_store.unconnected_peer(|_| false, false), None); } // 1 boot node (peer_info_a) is in the store. @@ -140,7 +266,7 @@ fn test_unconnected_peer_only_boot_nodes() { ) .unwrap(); peer_store.add_direct_peer(&clock.clock(), peer_info_a.clone()).unwrap(); - assert_eq!(peer_store.unconnected_peer(|_| false), Some(peer_info_a.clone())); + assert_eq!(peer_store.unconnected_peer(|_| false, false), Some(peer_info_a.clone())); } } diff --git a/chain/network/src/store/schema/mod.rs b/chain/network/src/store/schema/mod.rs index 44cd72398ee..e014d33d9be 100644 --- a/chain/network/src/store/schema/mod.rs +++ b/chain/network/src/store/schema/mod.rs @@ -96,6 +96,7 @@ impl BorshRepr for KnownPeerStateRepr { .map_err(invalid_data)?, last_seen: time::Utc::from_unix_timestamp_nanos(s.last_seen as i128) .map_err(invalid_data)?, + last_outbound_attempt: None, }) } } diff --git a/chain/network/src/types.rs b/chain/network/src/types.rs index d92ed26091d..52104b37467 100644 --- a/chain/network/src/types.rs +++ b/chain/network/src/types.rs @@ -82,9 +82,15 @@ pub struct Ban { /// Status of the known peers. #[derive(Eq, PartialEq, Debug, Clone)] pub enum KnownPeerStatus { + /// We got information about this peer from someone, but we didn't + /// verify them yet. This peer might not exist, invalid IP etc. + /// Also the peers that we failed to connect to, will be marked as 'Unknown'. Unknown, + /// We know that this peer exists - we were connected to it, or it was provided as boot node. NotConnected, + /// We're currently connected to this peer. Connected, + /// We banned this peer for some reason. Once the ban time is over, it will move to 'NotConnected' state. Banned(ReasonForBan, time::Utc), } @@ -95,6 +101,9 @@ pub struct KnownPeerState { pub status: KnownPeerStatus, pub first_seen: time::Utc, pub last_seen: time::Utc, + // Last time we tried to connect to this peer. + // This data is not persisted in storage. + pub last_outbound_attempt: Option<(time::Utc, Result<(), String>)>, } impl KnownPeerState { @@ -104,6 +113,7 @@ impl KnownPeerState { status: KnownPeerStatus::Unknown, first_seen: now, last_seen: now, + last_outbound_attempt: None, } } } diff --git a/core/primitives/src/views.rs b/core/primitives/src/views.rs index b440a35d037..53c52091875 100644 --- a/core/primitives/src/views.rs +++ b/core/primitives/src/views.rs @@ -252,7 +252,7 @@ pub struct KnownPeerStateView { pub addr: String, pub first_seen: i64, pub last_seen: i64, - pub last_attempt: Option, + pub last_attempt: Option<(i64, String)>, } #[cfg_attr(feature = "deepsize_feature", derive(deepsize::DeepSizeOf))] From 5a80195a9278f720626ff2de3129fd60cb95f915 Mon Sep 17 00:00:00 2001 From: pompon0 Date: Thu, 27 Oct 2022 13:26:56 +0200 Subject: [PATCH 038/103] Split peer_manager tests into multiple files. (#7941) I've grouped tests by feature they are testing and put them into separate files. I've also moved network_state.rs to network_state/mod.rs, because I plan to split it by features as well (in particular the TIER1-related methods will be in a separate file). The main benefit is that smaller files are easier to handle from the version control pov (diffs, merges, etc.). --- .../mod.rs} | 0 chain/network/src/peer_manager/tests.rs | 742 ------------------ .../src/peer_manager/tests/accounts_data.rs | 264 +++++++ .../src/peer_manager/tests/connection_pool.rs | 121 +++ chain/network/src/peer_manager/tests/mod.rs | 4 + chain/network/src/peer_manager/tests/nonce.rs | 77 ++ .../network/src/peer_manager/tests/routing.rs | 309 ++++++++ 7 files changed, 775 insertions(+), 742 deletions(-) rename chain/network/src/peer_manager/{network_state.rs => network_state/mod.rs} (100%) delete mode 100644 chain/network/src/peer_manager/tests.rs create mode 100644 chain/network/src/peer_manager/tests/accounts_data.rs create mode 100644 chain/network/src/peer_manager/tests/connection_pool.rs create mode 100644 chain/network/src/peer_manager/tests/mod.rs create mode 100644 chain/network/src/peer_manager/tests/nonce.rs create mode 100644 chain/network/src/peer_manager/tests/routing.rs diff --git a/chain/network/src/peer_manager/network_state.rs b/chain/network/src/peer_manager/network_state/mod.rs similarity index 100% rename from chain/network/src/peer_manager/network_state.rs rename to chain/network/src/peer_manager/network_state/mod.rs diff --git a/chain/network/src/peer_manager/tests.rs b/chain/network/src/peer_manager/tests.rs deleted file mode 100644 index 3d1ac76e966..00000000000 --- a/chain/network/src/peer_manager/tests.rs +++ /dev/null @@ -1,742 +0,0 @@ -use crate::concurrency::demux; -use crate::config; -use crate::network_protocol::testonly as data; -use crate::network_protocol::{ - Edge, Encoding, Handshake, PartialEdgeInfo, PeerAddr, SyncAccountsData, -}; -use crate::network_protocol::{Ping, RoutedMessageBody, EDGE_MIN_TIMESTAMP_NONCE}; -use crate::peer; -use crate::peer::peer_actor::ClosingReason; -use crate::peer_manager; -use crate::peer_manager::connection; -use crate::peer_manager::network_state::LIMIT_PENDING_PEERS; -use crate::peer_manager::peer_manager_actor::Event as PME; -use crate::peer_manager::testonly::{Event, NormalAccountData}; -use crate::private_actix::RegisterPeerError; -use crate::tcp; -use crate::testonly::stream::Stream; -use crate::testonly::{make_rng, AsSet as _}; -use crate::time; -use crate::types::{PeerMessage, RoutingTableUpdate}; -use itertools::Itertools; -use near_o11y::testonly::init_test_logger; -use near_primitives::version::PROTOCOL_VERSION; -use near_store::db::TestDB; -use peer_manager::testonly::start as start_pm; -use pretty_assertions::assert_eq; -use rand::seq::SliceRandom as _; -use rand::Rng as _; -use std::collections::HashSet; -use std::sync::Arc; - -// After the initial exchange, all subsequent SyncRoutingTable messages are -// expected to contain only the diff of the known data. -#[tokio::test] -async fn repeated_data_in_sync_routing_table() { - init_test_logger(); - let mut rng = make_rng(921853233); - let rng = &mut rng; - let mut clock = time::FakeClock::default(); - let chain = Arc::new(data::Chain::make(&mut clock, rng, 10)); - let pm = peer_manager::testonly::start( - clock.clock(), - near_store::db::TestDB::new(), - chain.make_config(rng), - chain.clone(), - ) - .await; - let cfg = peer::testonly::PeerConfig { - network: chain.make_config(rng), - chain, - peers: vec![], - force_encoding: Some(Encoding::Proto), - nonce: None, - }; - let stream = tcp::Stream::connect(&pm.peer_info()).await.unwrap(); - let mut peer = peer::testonly::PeerHandle::start_endpoint(clock.clock(), cfg, stream).await; - let edge = peer.complete_handshake().await; - - let mut edges_got = HashSet::new(); - let mut edges_want = HashSet::new(); - let mut accounts_got = HashSet::new(); - let mut accounts_want = HashSet::new(); - edges_want.insert(edge); - - // Gradually increment the amount of data in the system and then broadcast it. - for _ in 0..10 { - // Wait for the new data to be broadcasted. - // Note that in the first iteration we expect just 1 edge, without sending anything before. - // It is important because the first SyncRoutingTable contains snapshot of all data known to - // the node (not just the diff), so we expect incremental behavior only after the first - // SyncRoutingTable. - // TODO(gprusak): the first SyncRoutingTable will be delayed, until we replace actix - // internal clock with a fake clock. - while edges_got != edges_want || accounts_got != accounts_want { - match peer.events.recv().await { - peer::testonly::Event::Network(PME::MessageProcessed( - PeerMessage::SyncRoutingTable(got), - )) => { - for a in got.accounts { - assert!(!accounts_got.contains(&a), "repeated broadcast: {a:?}"); - assert!(accounts_want.contains(&a), "unexpected broadcast: {a:?}"); - accounts_got.insert(a); - } - for e in got.edges { - assert!(!edges_got.contains(&e), "repeated broadcast: {e:?}"); - assert!(edges_want.contains(&e), "unexpected broadcast: {e:?}"); - edges_got.insert(e); - } - } - // Ignore other messages. - _ => {} - } - } - // Add more data. - let signer = data::make_signer(rng); - edges_want.insert(data::make_edge(&peer.cfg.signer(), &signer)); - accounts_want.insert(data::make_announce_account(rng)); - // Send all the data created so far. PeerManager is expected to discard the duplicates. - peer.send(PeerMessage::SyncRoutingTable(RoutingTableUpdate { - edges: edges_want.iter().cloned().collect(), - accounts: accounts_want.iter().cloned().collect(), - })) - .await; - } -} - -/// Awaits for SyncRoutingTable messages until all edges from `want` arrive. -/// Panics if any other edges arrive. -async fn wait_for_edges(peer: &mut peer::testonly::PeerHandle, want: &HashSet) { - let mut got = HashSet::new(); - while &got != want { - match peer.events.recv().await { - peer::testonly::Event::Network(PME::MessageProcessed( - PeerMessage::SyncRoutingTable(msg), - )) => { - got.extend(msg.edges); - assert!(want.is_superset(&got)); - } - // Ignore other messages. - _ => {} - } - } -} - -// After each handshake a full sync of routing table is performed with the peer. -// After a restart, all the edges reside in storage. The node shouldn't broadcast -// edges which it learned about before the restart. -// This test takes ~6s because of delays enforced in the PeerManager. -#[tokio::test] -async fn no_edge_broadcast_after_restart() { - init_test_logger(); - let mut rng = make_rng(921853233); - let rng = &mut rng; - let mut clock = time::FakeClock::default(); - let chain = Arc::new(data::Chain::make(&mut clock, rng, 10)); - - let mut total_edges = HashSet::new(); - let store = near_store::db::TestDB::new(); - - for i in 0..3 { - println!("iteration {i}"); - // Start a PeerManager and connect a peer to it. - let pm = peer_manager::testonly::start( - clock.clock(), - store.clone(), - chain.make_config(rng), - chain.clone(), - ) - .await; - let cfg = peer::testonly::PeerConfig { - network: chain.make_config(rng), - chain: chain.clone(), - peers: vec![], - force_encoding: Some(Encoding::Proto), - nonce: None, - }; - let stream = tcp::Stream::connect(&pm.peer_info()).await.unwrap(); - let mut peer = peer::testonly::PeerHandle::start_endpoint(clock.clock(), cfg, stream).await; - let edge = peer.complete_handshake().await; - - // Receive the initial sync, which will consist just of the current edge: - // - the disconnected edges from the previous iterations are not loaded yet. - // - the local edges weren't stored at all. - tracing::info!(target: "test", "wait_for_edges()"); - wait_for_edges(&mut peer, &[edge.clone()].into()).await; - - // Create a bunch of fresh unreachable edges, then send all the edges created so far. - let fresh_edges: HashSet<_> = [ - data::make_edge(&data::make_signer(rng), &data::make_signer(rng)), - data::make_edge(&data::make_signer(rng), &data::make_signer(rng)), - data::make_edge_tombstone(&data::make_signer(rng), &data::make_signer(rng)), - ] - .into(); - total_edges.extend(fresh_edges.clone()); - // We capture the events starting here to record all the edge prunnings after the - // SyncRoutingTable below is processed. - let mut events = pm.events.from_now(); - peer.send(PeerMessage::SyncRoutingTable(RoutingTableUpdate { - edges: total_edges.iter().cloned().collect::>(), - accounts: vec![], - })) - .await; - - // Wait for the fresh edges to be broadcasted back. - tracing::info!(target: "test", "wait_for_edges()"); - wait_for_edges(&mut peer, &fresh_edges).await; - - // Wait for all the disconnected edges created so far to be saved to storage. - tracing::info!(target: "test", "wait for pruning"); - let mut pruned = HashSet::new(); - while pruned != total_edges { - match events.recv().await { - Event::PeerManager(PME::RoutingTableUpdate { pruned_edges, .. }) => { - pruned.extend(pruned_edges) - } - _ => {} - } - } - } -} - -// Nonces must be odd (as even ones are reserved for tombstones). -fn to_active_nonce(timestamp: time::Utc) -> u64 { - let value = timestamp.unix_timestamp() as u64; - if value % 2 == 0 { - value + 1 - } else { - value - } -} - -// Test connecting to peer manager with timestamp-like nonces. -#[tokio::test] -async fn test_nonces() { - init_test_logger(); - let mut rng = make_rng(921853233); - let rng = &mut rng; - let mut clock = time::FakeClock::new(*EDGE_MIN_TIMESTAMP_NONCE + time::Duration::days(2)); - let chain = Arc::new(data::Chain::make(&mut clock, rng, 10)); - - // Start a PeerManager and connect a peer to it. - let pm = peer_manager::testonly::start( - clock.clock(), - near_store::db::TestDB::new(), - chain.make_config(rng), - chain.clone(), - ) - .await; - - let test_cases = [ - // Try to connect with peer with a valid nonce (current timestamp). - (Some(to_active_nonce(clock.now_utc())), true, "current timestamp"), - // Now try the peer with invalid timestamp (in the past) - (Some(to_active_nonce(clock.now_utc() - time::Duration::days(1))), false, "past timestamp"), - // Now try the peer with invalid timestamp (in the future) - ( - Some(to_active_nonce(clock.now_utc() + time::Duration::days(1))), - false, - "future timestamp", - ), - (Some(u64::MAX), false, "u64 max"), - (Some(i64::MAX as u64), false, "i64 max"), - (Some((i64::MAX - 1) as u64), false, "i64 max - 1"), - (Some(253402300799), false, "Max time"), - (Some(253402300799 + 2), false, "Over max time"), - //(Some(0), false, "Nonce 0"), - (None, true, "Nonce 1"), - ]; - - for test in test_cases { - println!("Running test {:?}", test.2); - let cfg = peer::testonly::PeerConfig { - network: chain.make_config(rng), - chain: chain.clone(), - peers: vec![], - force_encoding: Some(Encoding::Proto), - // Connect with nonce equal to unix timestamp - nonce: test.0, - }; - let stream = tcp::Stream::connect(&pm.peer_info()).await.unwrap(); - let mut peer = peer::testonly::PeerHandle::start_endpoint(clock.clock(), cfg, stream).await; - if test.1 { - peer.complete_handshake().await; - } else { - peer.fail_handshake().await; - } - } -} - -// test that TTL is handled property. -#[tokio::test] -async fn ttl() { - init_test_logger(); - let mut rng = make_rng(921853233); - let rng = &mut rng; - let mut clock = time::FakeClock::default(); - let chain = Arc::new(data::Chain::make(&mut clock, rng, 10)); - let mut pm = peer_manager::testonly::start( - clock.clock(), - near_store::db::TestDB::new(), - chain.make_config(rng), - chain.clone(), - ) - .await; - let cfg = peer::testonly::PeerConfig { - network: chain.make_config(rng), - chain, - peers: vec![], - force_encoding: Some(Encoding::Proto), - nonce: None, - }; - let stream = tcp::Stream::connect(&pm.peer_info()).await.unwrap(); - let mut peer = peer::testonly::PeerHandle::start_endpoint(clock.clock(), cfg, stream).await; - peer.complete_handshake().await; - // await for peer manager to compute the routing table. - // TODO(gprusak): probably extract it to a separate function when migrating other tests from - // integration-tests to near_network. - pm.events - .recv_until(|ev| match ev { - Event::PeerManager(PME::RoutingTableUpdate { next_hops, .. }) => { - if next_hops.get(&peer.cfg.id()).map_or(false, |v| v.len() > 0) { - Some(()) - } else { - None - } - } - _ => None, - }) - .await; - - for ttl in 0..5 { - let msg = RoutedMessageBody::Ping(Ping { nonce: rng.gen(), source: peer.cfg.id() }); - let msg = Box::new(peer.routed_message(msg, peer.cfg.id(), ttl, Some(clock.now_utc()))); - peer.send(PeerMessage::Routed(msg.clone())).await; - // If TTL is <2, then the message will be dropped (at least 2 hops are required). - if ttl < 2 { - pm.events - .recv_until(|ev| match ev { - Event::PeerManager(PME::RoutedMessageDropped) => Some(()), - _ => None, - }) - .await; - } else { - let got = peer - .events - .recv_until(|ev| match ev { - peer::testonly::Event::Network(PME::MessageProcessed(PeerMessage::Routed( - msg, - ))) => Some(msg), - _ => None, - }) - .await; - assert_eq!(msg.body, got.body); - assert_eq!(msg.ttl - 1, got.ttl); - } - } -} - -#[tokio::test] -async fn accounts_data_broadcast() { - init_test_logger(); - let mut rng = make_rng(921853233); - let rng = &mut rng; - let mut clock = time::FakeClock::default(); - let chain = Arc::new(data::Chain::make(&mut clock, rng, 10)); - let clock = clock.clock(); - let clock = &clock; - - let mut pm = peer_manager::testonly::start( - clock.clone(), - near_store::db::TestDB::new(), - chain.make_config(rng), - chain.clone(), - ) - .await; - - let take_sync = |ev| match ev { - peer::testonly::Event::Network(PME::MessageProcessed(PeerMessage::SyncAccountsData( - msg, - ))) => Some(msg), - _ => None, - }; - - let data = chain.make_tier1_data(rng, clock); - - // Connect peer, expect initial sync to be empty. - let mut peer1 = - pm.start_inbound(chain.clone(), chain.make_config(rng)).await.handshake(clock).await; - let got1 = peer1.events.recv_until(take_sync).await; - assert_eq!(got1.accounts_data, vec![]); - - // Send some data. It won't be broadcasted back. - let msg = SyncAccountsData { - accounts_data: vec![data[0].clone(), data[1].clone()], - incremental: true, - requesting_full_sync: false, - }; - let want = msg.accounts_data.clone(); - peer1.send(PeerMessage::SyncAccountsData(msg)).await; - pm.wait_for_accounts_data(&want.iter().map(|d| d.into()).collect()).await; - - // Connect another peer and perform initial full sync. - let mut peer2 = - pm.start_inbound(chain.clone(), chain.make_config(rng)).await.handshake(clock).await; - let got2 = peer2.events.recv_until(take_sync).await; - assert_eq!(got2.accounts_data.as_set(), want.as_set()); - - // Send a mix of new and old data. Only new data should be broadcasted. - let msg = SyncAccountsData { - accounts_data: vec![data[1].clone(), data[2].clone()], - incremental: true, - requesting_full_sync: false, - }; - let want = vec![data[2].clone()]; - peer1.send(PeerMessage::SyncAccountsData(msg)).await; - let got2 = peer2.events.recv_until(take_sync).await; - assert_eq!(got2.accounts_data.as_set(), want.as_set()); - - // Send a request for a full sync. - let want = vec![data[0].clone(), data[1].clone(), data[2].clone()]; - peer1 - .send(PeerMessage::SyncAccountsData(SyncAccountsData { - accounts_data: vec![], - incremental: true, - requesting_full_sync: true, - })) - .await; - let got1 = peer1.events.recv_until(take_sync).await; - assert_eq!(got1.accounts_data.as_set(), want.as_set()); -} - -fn peer_addrs(vc: &config::ValidatorConfig) -> Vec { - match &vc.endpoints { - config::ValidatorEndpoints::PublicAddrs(peer_addrs) => peer_addrs.clone(), - config::ValidatorEndpoints::TrustedStunServers(_) => { - panic!("tests only support PublicAddrs in validator config") - } - } -} - -// Test with 3 peer managers connected sequentially: 1-2-3 -// All of them are validators. -// No matter what the order of shifting into the epoch, -// all of them should receive all the AccountDatas eventually. -#[tokio::test] -async fn accounts_data_gradual_epoch_change() { - init_test_logger(); - let mut rng = make_rng(921853233); - let rng = &mut rng; - let mut clock = time::FakeClock::default(); - let chain = Arc::new(data::Chain::make(&mut clock, rng, 10)); - - let mut pms = vec![]; - for _ in 0..3 { - pms.push( - peer_manager::testonly::start( - clock.clock(), - near_store::db::TestDB::new(), - chain.make_config(rng), - chain.clone(), - ) - .await, - ); - } - - // 0 <-> 1 <-> 2 - let pm1 = pms[1].peer_info(); - let pm2 = pms[2].peer_info(); - pms[0].connect_to(&pm1).await; - pms[1].connect_to(&pm2).await; - - // Validator configs. - let vs: Vec<_> = pms.iter().map(|pm| pm.cfg.validator.clone().unwrap()).collect(); - - // For every order of nodes. - for ids in (0..pms.len()).permutations(3) { - // Construct ChainInfo for a new epoch, - // with tier1_accounts containing all validators. - let e = data::make_epoch_id(rng); - let mut chain_info = chain.get_chain_info(); - chain_info.tier1_accounts = Arc::new( - vs.iter() - .map(|v| ((e.clone(), v.signer.validator_id().clone()), v.signer.public_key())) - .collect(), - ); - - // Advance epoch in the given order. - for id in ids { - pms[id].set_chain_info(chain_info.clone()).await; - } - - // Wait for data to arrive. - let want = vs - .iter() - .map(|v| NormalAccountData { - epoch_id: e.clone(), - account_id: v.signer.validator_id().clone(), - peers: peer_addrs(v), - }) - .collect(); - for pm in &mut pms { - pm.wait_for_accounts_data(&want).await; - } - } -} - -// Test is expected to take ~5s. -// Test with 20 peer managers connected in layers: -// - 1st 5 and 2nd 5 are connected in full bipartite graph. -// - 2nd 5 and 3rd 5 ... -// - 3rd 5 and 4th 5 ... -// All of them are validators. -#[tokio::test(flavor = "multi_thread")] -async fn accounts_data_rate_limiting() { - init_test_logger(); - let mut rng = make_rng(921853233); - let rng = &mut rng; - let mut clock = time::FakeClock::default(); - let chain = Arc::new(data::Chain::make(&mut clock, rng, 10)); - - // TODO(gprusak) 10 connections per peer is not much, try to scale up this test 2x (some config - // tweaking might be required). - let n = 4; // layers - let m = 5; // peer managers per layer - let mut pms = vec![]; - for _ in 0..n * m { - let mut cfg = chain.make_config(rng); - cfg.accounts_data_broadcast_rate_limit = demux::RateLimit { qps: 0.5, burst: 1 }; - pms.push( - peer_manager::testonly::start( - clock.clock(), - near_store::db::TestDB::new(), - cfg, - chain.clone(), - ) - .await, - ); - } - // Construct a 4-layer bipartite graph. - let mut connections = 0; - for i in 0..n - 1 { - for j in 0..m { - for k in 0..m { - let pi = pms[(i + 1) * m + k].peer_info(); - pms[i * m + j].connect_to(&pi).await; - connections += 1; - } - } - } - - // Validator configs. - let vs: Vec<_> = pms.iter().map(|pm| pm.cfg.validator.clone().unwrap()).collect(); - - // Construct ChainInfo for a new epoch, - // with tier1_accounts containing all validators. - let e = data::make_epoch_id(rng); - let mut chain_info = chain.get_chain_info(); - chain_info.tier1_accounts = Arc::new( - vs.iter() - .map(|v| ((e.clone(), v.signer.validator_id().clone()), v.signer.public_key())) - .collect(), - ); - - // Advance epoch in random order. - pms.shuffle(rng); - for pm in &mut pms { - pm.set_chain_info(chain_info.clone()).await; - } - - // Capture the event streams at the start, so that we can compute - // the total number of SyncAccountsData messages exchanged in the process. - let events: Vec<_> = pms.iter().map(|pm| pm.events.clone()).collect(); - - // Wait for data to arrive. - let want = vs - .iter() - .map(|v| NormalAccountData { - epoch_id: e.clone(), - account_id: v.signer.validator_id().clone(), - peers: peer_addrs(&v), - }) - .collect(); - for pm in &mut pms { - pm.wait_for_accounts_data(&want).await; - } - - // Count the SyncAccountsData messages exchanged. - let mut msgs = 0; - for mut es in events { - while let Some(ev) = es.try_recv() { - if peer_manager::testonly::unwrap_sync_accounts_data_processed(ev).is_some() { - msgs += 1; - } - } - } - - // We expect 3 rounds communication to cover the distance from 1st layer to 4th layer - // and +1 full sync at handshake. - // The communication is bidirectional, which gives 8 messages per connection. - // Then add +50% to accomodate for test execution flakiness (12 messages per connection). - // TODO(gprusak): if the test is still flaky, upgrade FakeClock for stricter flow control. - let want_max = connections * 12; - println!("got {msgs}, want <= {want_max}"); - assert!(msgs <= want_max, "got {msgs} messages, want at most {want_max}"); -} - -#[tokio::test] -async fn connection_spam_security_test() { - init_test_logger(); - let mut rng = make_rng(921853233); - let rng = &mut rng; - let mut clock = time::FakeClock::default(); - let chain = Arc::new(data::Chain::make(&mut clock, rng, 10)); - - let mut cfg = chain.make_config(rng); - // Make sure that connections will never get dropped. - cfg.handshake_timeout = time::Duration::hours(1); - let pm = peer_manager::testonly::start( - clock.clock(), - near_store::db::TestDB::new(), - cfg, - chain.clone(), - ) - .await; - - // Saturate the pending connections limit. - let mut conns = vec![]; - for _ in 0..LIMIT_PENDING_PEERS { - conns.push(pm.start_inbound(chain.clone(), chain.make_config(rng)).await); - } - // Try to establish additional connections. Should fail. - for _ in 0..10 { - let conn = pm.start_inbound(chain.clone(), chain.make_config(rng)).await; - assert_eq!( - ClosingReason::TooManyInbound, - conn.manager_fail_handshake(&clock.clock()).await - ); - } - // Terminate the pending connections. Should succeed. - for c in conns { - c.handshake(&clock.clock()).await; - } -} - -#[tokio::test] -async fn loop_connection() { - init_test_logger(); - let mut rng = make_rng(921853233); - let rng = &mut rng; - let mut clock = time::FakeClock::default(); - let chain = Arc::new(data::Chain::make(&mut clock, rng, 10)); - - let pm = peer_manager::testonly::start( - clock.clock(), - near_store::db::TestDB::new(), - chain.make_config(rng), - chain.clone(), - ) - .await; - let mut cfg = chain.make_config(rng); - cfg.node_key = pm.cfg.node_key.clone(); - - // Starting an outbound loop connection should be stopped without sending the handshake. - let conn = pm.start_outbound(chain.clone(), cfg).await; - assert_eq!( - ClosingReason::OutboundNotAllowed(connection::PoolError::LoopConnection), - conn.manager_fail_handshake(&clock.clock()).await - ); - - // An inbound connection pretending to be a loop should be rejected. - let stream = tcp::Stream::connect(&pm.peer_info()).await.unwrap(); - let stream_id = stream.id(); - let port = stream.local_addr.port(); - let mut events = pm.events.from_now(); - let mut stream = Stream::new(Some(Encoding::Proto), stream); - stream - .write(&PeerMessage::Handshake(Handshake { - protocol_version: PROTOCOL_VERSION, - oldest_supported_version: PROTOCOL_VERSION, - sender_peer_id: pm.cfg.node_id(), - target_peer_id: pm.cfg.node_id(), - sender_listen_port: Some(port), - sender_chain_info: chain.get_peer_chain_info(), - partial_edge_info: PartialEdgeInfo::new( - &pm.cfg.node_id(), - &pm.cfg.node_id(), - 1, - &pm.cfg.node_key, - ), - })) - .await; - let reason = events - .recv_until(|ev| match ev { - Event::PeerManager(PME::ConnectionClosed(ev)) if ev.stream_id == stream_id => { - Some(ev.reason) - } - Event::PeerManager(PME::HandshakeCompleted(ev)) if ev.stream_id == stream_id => { - panic!("PeerManager accepted the handshake") - } - _ => None, - }) - .await; - assert_eq!( - ClosingReason::RejectedByPeerManager(RegisterPeerError::PoolError( - connection::PoolError::LoopConnection - )), - reason - ); -} - -#[tokio::test] -async fn square() { - init_test_logger(); - let mut rng = make_rng(921853233); - let rng = &mut rng; - let mut clock = time::FakeClock::default(); - let chain = Arc::new(data::Chain::make(&mut clock, rng, 10)); - - tracing::info!(target:"test", "connect 4 nodes in a square"); - let pm0 = start_pm(clock.clock(), TestDB::new(), chain.make_config(rng), chain.clone()).await; - let pm1 = start_pm(clock.clock(), TestDB::new(), chain.make_config(rng), chain.clone()).await; - let pm2 = start_pm(clock.clock(), TestDB::new(), chain.make_config(rng), chain.clone()).await; - let pm3 = start_pm(clock.clock(), TestDB::new(), chain.make_config(rng), chain.clone()).await; - pm0.connect_to(&pm1.peer_info()).await; - pm1.connect_to(&pm2.peer_info()).await; - pm2.connect_to(&pm3.peer_info()).await; - pm3.connect_to(&pm0.peer_info()).await; - let id0 = pm0.cfg.node_id(); - let id1 = pm1.cfg.node_id(); - let id2 = pm2.cfg.node_id(); - let id3 = pm3.cfg.node_id(); - - pm0.wait_for_routing_table(&[ - (id1.clone(), vec![id1.clone()]), - (id3.clone(), vec![id3.clone()]), - (id2.clone(), vec![id1.clone(), id3.clone()]), - ]) - .await; - tracing::info!(target:"test","stop {id1}"); - drop(pm1); - tracing::info!(target:"test","wait for {id0} routing table"); - pm0.wait_for_routing_table(&[ - (id3.clone(), vec![id3.clone()]), - (id2.clone(), vec![id3.clone()]), - ]) - .await; - tracing::info!(target:"test","wait for {id2} routing table"); - pm2.wait_for_routing_table(&[ - (id3.clone(), vec![id3.clone()]), - (id0.clone(), vec![id3.clone()]), - ]) - .await; - tracing::info!(target:"test","wait for {id3} routing table"); - pm3.wait_for_routing_table(&[ - (id2.clone(), vec![id2.clone()]), - (id0.clone(), vec![id0.clone()]), - ]) - .await; - drop(pm0); - drop(pm2); - drop(pm3); -} diff --git a/chain/network/src/peer_manager/tests/accounts_data.rs b/chain/network/src/peer_manager/tests/accounts_data.rs new file mode 100644 index 00000000000..f031f7bfcea --- /dev/null +++ b/chain/network/src/peer_manager/tests/accounts_data.rs @@ -0,0 +1,264 @@ +use crate::concurrency::demux; +use crate::config; +use crate::network_protocol::testonly as data; +use crate::network_protocol::{PeerAddr, SyncAccountsData}; +use crate::peer; +use crate::peer_manager; +use crate::peer_manager::peer_manager_actor::Event as PME; +use crate::peer_manager::testonly::NormalAccountData; +use crate::testonly::{make_rng, AsSet as _}; +use crate::time; +use crate::types::PeerMessage; +use itertools::Itertools; +use near_o11y::testonly::init_test_logger; +use pretty_assertions::assert_eq; +use rand::seq::SliceRandom as _; +use std::sync::Arc; + +fn peer_addrs(vc: &config::ValidatorConfig) -> Vec { + match &vc.endpoints { + config::ValidatorEndpoints::PublicAddrs(peer_addrs) => peer_addrs.clone(), + config::ValidatorEndpoints::TrustedStunServers(_) => { + panic!("tests only support PublicAddrs in validator config") + } + } +} + +#[tokio::test] +async fn accounts_data_broadcast() { + init_test_logger(); + let mut rng = make_rng(921853233); + let rng = &mut rng; + let mut clock = time::FakeClock::default(); + let chain = Arc::new(data::Chain::make(&mut clock, rng, 10)); + let clock = clock.clock(); + let clock = &clock; + + let mut pm = peer_manager::testonly::start( + clock.clone(), + near_store::db::TestDB::new(), + chain.make_config(rng), + chain.clone(), + ) + .await; + + let take_sync = |ev| match ev { + peer::testonly::Event::Network(PME::MessageProcessed(PeerMessage::SyncAccountsData( + msg, + ))) => Some(msg), + _ => None, + }; + + let data = chain.make_tier1_data(rng, clock); + + // Connect peer, expect initial sync to be empty. + let mut peer1 = + pm.start_inbound(chain.clone(), chain.make_config(rng)).await.handshake(clock).await; + let got1 = peer1.events.recv_until(take_sync).await; + assert_eq!(got1.accounts_data, vec![]); + + // Send some data. It won't be broadcasted back. + let msg = SyncAccountsData { + accounts_data: vec![data[0].clone(), data[1].clone()], + incremental: true, + requesting_full_sync: false, + }; + let want = msg.accounts_data.clone(); + peer1.send(PeerMessage::SyncAccountsData(msg)).await; + pm.wait_for_accounts_data(&want.iter().map(|d| d.into()).collect()).await; + + // Connect another peer and perform initial full sync. + let mut peer2 = + pm.start_inbound(chain.clone(), chain.make_config(rng)).await.handshake(clock).await; + let got2 = peer2.events.recv_until(take_sync).await; + assert_eq!(got2.accounts_data.as_set(), want.as_set()); + + // Send a mix of new and old data. Only new data should be broadcasted. + let msg = SyncAccountsData { + accounts_data: vec![data[1].clone(), data[2].clone()], + incremental: true, + requesting_full_sync: false, + }; + let want = vec![data[2].clone()]; + peer1.send(PeerMessage::SyncAccountsData(msg)).await; + let got2 = peer2.events.recv_until(take_sync).await; + assert_eq!(got2.accounts_data.as_set(), want.as_set()); + + // Send a request for a full sync. + let want = vec![data[0].clone(), data[1].clone(), data[2].clone()]; + peer1 + .send(PeerMessage::SyncAccountsData(SyncAccountsData { + accounts_data: vec![], + incremental: true, + requesting_full_sync: true, + })) + .await; + let got1 = peer1.events.recv_until(take_sync).await; + assert_eq!(got1.accounts_data.as_set(), want.as_set()); +} + +// Test with 3 peer managers connected sequentially: 1-2-3 +// All of them are validators. +// No matter what the order of shifting into the epoch, +// all of them should receive all the AccountDatas eventually. +#[tokio::test] +async fn accounts_data_gradual_epoch_change() { + init_test_logger(); + let mut rng = make_rng(921853233); + let rng = &mut rng; + let mut clock = time::FakeClock::default(); + let chain = Arc::new(data::Chain::make(&mut clock, rng, 10)); + + let mut pms = vec![]; + for _ in 0..3 { + pms.push( + peer_manager::testonly::start( + clock.clock(), + near_store::db::TestDB::new(), + chain.make_config(rng), + chain.clone(), + ) + .await, + ); + } + + // 0 <-> 1 <-> 2 + let pm1 = pms[1].peer_info(); + let pm2 = pms[2].peer_info(); + pms[0].connect_to(&pm1).await; + pms[1].connect_to(&pm2).await; + + // Validator configs. + let vs: Vec<_> = pms.iter().map(|pm| pm.cfg.validator.clone().unwrap()).collect(); + + // For every order of nodes. + for ids in (0..pms.len()).permutations(3) { + // Construct ChainInfo for a new epoch, + // with tier1_accounts containing all validators. + let e = data::make_epoch_id(rng); + let mut chain_info = chain.get_chain_info(); + chain_info.tier1_accounts = Arc::new( + vs.iter() + .map(|v| ((e.clone(), v.signer.validator_id().clone()), v.signer.public_key())) + .collect(), + ); + + // Advance epoch in the given order. + for id in ids { + pms[id].set_chain_info(chain_info.clone()).await; + } + + // Wait for data to arrive. + let want = vs + .iter() + .map(|v| NormalAccountData { + epoch_id: e.clone(), + account_id: v.signer.validator_id().clone(), + peers: peer_addrs(v), + }) + .collect(); + for pm in &mut pms { + pm.wait_for_accounts_data(&want).await; + } + } +} + +// Test is expected to take ~5s. +// Test with 20 peer managers connected in layers: +// - 1st 5 and 2nd 5 are connected in full bipartite graph. +// - 2nd 5 and 3rd 5 ... +// - 3rd 5 and 4th 5 ... +// All of them are validators. +#[tokio::test(flavor = "multi_thread")] +async fn accounts_data_rate_limiting() { + init_test_logger(); + let mut rng = make_rng(921853233); + let rng = &mut rng; + let mut clock = time::FakeClock::default(); + let chain = Arc::new(data::Chain::make(&mut clock, rng, 10)); + + // TODO(gprusak) 10 connections per peer is not much, try to scale up this test 2x (some config + // tweaking might be required). + let n = 4; // layers + let m = 5; // peer managers per layer + let mut pms = vec![]; + for _ in 0..n * m { + let mut cfg = chain.make_config(rng); + cfg.accounts_data_broadcast_rate_limit = demux::RateLimit { qps: 0.5, burst: 1 }; + pms.push( + peer_manager::testonly::start( + clock.clock(), + near_store::db::TestDB::new(), + cfg, + chain.clone(), + ) + .await, + ); + } + // Construct a 4-layer bipartite graph. + let mut connections = 0; + for i in 0..n - 1 { + for j in 0..m { + for k in 0..m { + let pi = pms[(i + 1) * m + k].peer_info(); + pms[i * m + j].connect_to(&pi).await; + connections += 1; + } + } + } + + // Validator configs. + let vs: Vec<_> = pms.iter().map(|pm| pm.cfg.validator.clone().unwrap()).collect(); + + // Construct ChainInfo for a new epoch, + // with tier1_accounts containing all validators. + let e = data::make_epoch_id(rng); + let mut chain_info = chain.get_chain_info(); + chain_info.tier1_accounts = Arc::new( + vs.iter() + .map(|v| ((e.clone(), v.signer.validator_id().clone()), v.signer.public_key())) + .collect(), + ); + + // Advance epoch in random order. + pms.shuffle(rng); + for pm in &mut pms { + pm.set_chain_info(chain_info.clone()).await; + } + + // Capture the event streams at the start, so that we can compute + // the total number of SyncAccountsData messages exchanged in the process. + let events: Vec<_> = pms.iter().map(|pm| pm.events.clone()).collect(); + + // Wait for data to arrive. + let want = vs + .iter() + .map(|v| NormalAccountData { + epoch_id: e.clone(), + account_id: v.signer.validator_id().clone(), + peers: peer_addrs(&v), + }) + .collect(); + for pm in &mut pms { + pm.wait_for_accounts_data(&want).await; + } + + // Count the SyncAccountsData messages exchanged. + let mut msgs = 0; + for mut es in events { + while let Some(ev) = es.try_recv() { + if peer_manager::testonly::unwrap_sync_accounts_data_processed(ev).is_some() { + msgs += 1; + } + } + } + + // We expect 3 rounds communication to cover the distance from 1st layer to 4th layer + // and +1 full sync at handshake. + // The communication is bidirectional, which gives 8 messages per connection. + // Then add +50% to accomodate for test execution flakiness (12 messages per connection). + // TODO(gprusak): if the test is still flaky, upgrade FakeClock for stricter flow control. + let want_max = connections * 12; + println!("got {msgs}, want <= {want_max}"); + assert!(msgs <= want_max, "got {msgs} messages, want at most {want_max}"); +} diff --git a/chain/network/src/peer_manager/tests/connection_pool.rs b/chain/network/src/peer_manager/tests/connection_pool.rs new file mode 100644 index 00000000000..3177457c132 --- /dev/null +++ b/chain/network/src/peer_manager/tests/connection_pool.rs @@ -0,0 +1,121 @@ +use crate::network_protocol::testonly as data; +use crate::network_protocol::PeerMessage; +use crate::network_protocol::{Encoding, Handshake, PartialEdgeInfo}; +use crate::peer::peer_actor::ClosingReason; +use crate::peer_manager; +use crate::peer_manager::connection; +use crate::peer_manager::network_state::LIMIT_PENDING_PEERS; +use crate::peer_manager::peer_manager_actor::Event as PME; +use crate::peer_manager::testonly::Event; +use crate::private_actix::RegisterPeerError; +use crate::tcp; +use crate::testonly::make_rng; +use crate::testonly::stream::Stream; +use crate::time; +use near_o11y::testonly::init_test_logger; +use near_primitives::version::PROTOCOL_VERSION; +use std::sync::Arc; + +#[tokio::test] +async fn connection_spam_security_test() { + init_test_logger(); + let mut rng = make_rng(921853233); + let rng = &mut rng; + let mut clock = time::FakeClock::default(); + let chain = Arc::new(data::Chain::make(&mut clock, rng, 10)); + + let mut cfg = chain.make_config(rng); + // Make sure that connections will never get dropped. + cfg.handshake_timeout = time::Duration::hours(1); + let pm = peer_manager::testonly::start( + clock.clock(), + near_store::db::TestDB::new(), + cfg, + chain.clone(), + ) + .await; + + // Saturate the pending connections limit. + let mut conns = vec![]; + for _ in 0..LIMIT_PENDING_PEERS { + conns.push(pm.start_inbound(chain.clone(), chain.make_config(rng)).await); + } + // Try to establish additional connections. Should fail. + for _ in 0..10 { + let conn = pm.start_inbound(chain.clone(), chain.make_config(rng)).await; + assert_eq!( + ClosingReason::TooManyInbound, + conn.manager_fail_handshake(&clock.clock()).await + ); + } + // Terminate the pending connections. Should succeed. + for c in conns { + c.handshake(&clock.clock()).await; + } +} + +#[tokio::test] +async fn loop_connection() { + init_test_logger(); + let mut rng = make_rng(921853233); + let rng = &mut rng; + let mut clock = time::FakeClock::default(); + let chain = Arc::new(data::Chain::make(&mut clock, rng, 10)); + + let pm = peer_manager::testonly::start( + clock.clock(), + near_store::db::TestDB::new(), + chain.make_config(rng), + chain.clone(), + ) + .await; + let mut cfg = chain.make_config(rng); + cfg.node_key = pm.cfg.node_key.clone(); + + // Starting an outbound loop connection should be stopped without sending the handshake. + let conn = pm.start_outbound(chain.clone(), cfg).await; + assert_eq!( + ClosingReason::OutboundNotAllowed(connection::PoolError::LoopConnection), + conn.manager_fail_handshake(&clock.clock()).await + ); + + // An inbound connection pretending to be a loop should be rejected. + let stream = tcp::Stream::connect(&pm.peer_info()).await.unwrap(); + let stream_id = stream.id(); + let port = stream.local_addr.port(); + let mut events = pm.events.from_now(); + let mut stream = Stream::new(Some(Encoding::Proto), stream); + stream + .write(&PeerMessage::Handshake(Handshake { + protocol_version: PROTOCOL_VERSION, + oldest_supported_version: PROTOCOL_VERSION, + sender_peer_id: pm.cfg.node_id(), + target_peer_id: pm.cfg.node_id(), + sender_listen_port: Some(port), + sender_chain_info: chain.get_peer_chain_info(), + partial_edge_info: PartialEdgeInfo::new( + &pm.cfg.node_id(), + &pm.cfg.node_id(), + 1, + &pm.cfg.node_key, + ), + })) + .await; + let reason = events + .recv_until(|ev| match ev { + Event::PeerManager(PME::ConnectionClosed(ev)) if ev.stream_id == stream_id => { + Some(ev.reason) + } + Event::PeerManager(PME::HandshakeCompleted(ev)) if ev.stream_id == stream_id => { + panic!("PeerManager accepted the handshake") + } + _ => None, + }) + .await; + assert_eq!( + ClosingReason::RejectedByPeerManager(RegisterPeerError::PoolError( + connection::PoolError::LoopConnection + )), + reason + ); +} diff --git a/chain/network/src/peer_manager/tests/mod.rs b/chain/network/src/peer_manager/tests/mod.rs new file mode 100644 index 00000000000..f555e7d18f6 --- /dev/null +++ b/chain/network/src/peer_manager/tests/mod.rs @@ -0,0 +1,4 @@ +mod accounts_data; +mod connection_pool; +mod nonce; +mod routing; diff --git a/chain/network/src/peer_manager/tests/nonce.rs b/chain/network/src/peer_manager/tests/nonce.rs new file mode 100644 index 00000000000..f06c9b537ff --- /dev/null +++ b/chain/network/src/peer_manager/tests/nonce.rs @@ -0,0 +1,77 @@ +use crate::network_protocol::testonly as data; +use crate::network_protocol::{Encoding, EDGE_MIN_TIMESTAMP_NONCE}; +use crate::peer; +use crate::peer_manager; +use crate::tcp; +use crate::testonly::make_rng; +use crate::time; +use near_o11y::testonly::init_test_logger; +use std::sync::Arc; + +// Nonces must be odd (as even ones are reserved for tombstones). +fn to_active_nonce(timestamp: time::Utc) -> u64 { + let value = timestamp.unix_timestamp() as u64; + if value % 2 == 0 { + value + 1 + } else { + value + } +} + +// Test connecting to peer manager with timestamp-like nonces. +#[tokio::test] +async fn test_nonces() { + init_test_logger(); + let mut rng = make_rng(921853233); + let rng = &mut rng; + let mut clock = time::FakeClock::new(*EDGE_MIN_TIMESTAMP_NONCE + time::Duration::days(2)); + let chain = Arc::new(data::Chain::make(&mut clock, rng, 10)); + + // Start a PeerManager and connect a peer to it. + let pm = peer_manager::testonly::start( + clock.clock(), + near_store::db::TestDB::new(), + chain.make_config(rng), + chain.clone(), + ) + .await; + + let test_cases = [ + // Try to connect with peer with a valid nonce (current timestamp). + (Some(to_active_nonce(clock.now_utc())), true, "current timestamp"), + // Now try the peer with invalid timestamp (in the past) + (Some(to_active_nonce(clock.now_utc() - time::Duration::days(1))), false, "past timestamp"), + // Now try the peer with invalid timestamp (in the future) + ( + Some(to_active_nonce(clock.now_utc() + time::Duration::days(1))), + false, + "future timestamp", + ), + (Some(u64::MAX), false, "u64 max"), + (Some(i64::MAX as u64), false, "i64 max"), + (Some((i64::MAX - 1) as u64), false, "i64 max - 1"), + (Some(253402300799), false, "Max time"), + (Some(253402300799 + 2), false, "Over max time"), + //(Some(0), false, "Nonce 0"), + (None, true, "Nonce 1"), + ]; + + for test in test_cases { + println!("Running test {:?}", test.2); + let cfg = peer::testonly::PeerConfig { + network: chain.make_config(rng), + chain: chain.clone(), + peers: vec![], + force_encoding: Some(Encoding::Proto), + // Connect with nonce equal to unix timestamp + nonce: test.0, + }; + let stream = tcp::Stream::connect(&pm.peer_info()).await.unwrap(); + let mut peer = peer::testonly::PeerHandle::start_endpoint(clock.clock(), cfg, stream).await; + if test.1 { + peer.complete_handshake().await; + } else { + peer.fail_handshake().await; + } + } +} diff --git a/chain/network/src/peer_manager/tests/routing.rs b/chain/network/src/peer_manager/tests/routing.rs new file mode 100644 index 00000000000..36ec4c59b09 --- /dev/null +++ b/chain/network/src/peer_manager/tests/routing.rs @@ -0,0 +1,309 @@ +use crate::network_protocol::testonly as data; +use crate::network_protocol::{Edge, Encoding, Ping, RoutedMessageBody, RoutingTableUpdate}; +use crate::peer; +use crate::peer_manager; +use crate::peer_manager::peer_manager_actor::Event as PME; +use crate::peer_manager::testonly::start as start_pm; +use crate::peer_manager::testonly::Event; +use crate::tcp; +use crate::testonly::make_rng; +use crate::time; +use crate::types::PeerMessage; +use near_o11y::testonly::init_test_logger; +use near_store::db::TestDB; +use pretty_assertions::assert_eq; +use rand::Rng as _; +use std::collections::HashSet; +use std::sync::Arc; + +// test that TTL is handled property. +#[tokio::test] +async fn ttl() { + init_test_logger(); + let mut rng = make_rng(921853233); + let rng = &mut rng; + let mut clock = time::FakeClock::default(); + let chain = Arc::new(data::Chain::make(&mut clock, rng, 10)); + let mut pm = peer_manager::testonly::start( + clock.clock(), + near_store::db::TestDB::new(), + chain.make_config(rng), + chain.clone(), + ) + .await; + let cfg = peer::testonly::PeerConfig { + network: chain.make_config(rng), + chain, + peers: vec![], + force_encoding: Some(Encoding::Proto), + nonce: None, + }; + let stream = tcp::Stream::connect(&pm.peer_info()).await.unwrap(); + let mut peer = peer::testonly::PeerHandle::start_endpoint(clock.clock(), cfg, stream).await; + peer.complete_handshake().await; + // await for peer manager to compute the routing table. + // TODO(gprusak): probably extract it to a separate function when migrating other tests from + // integration-tests to near_network. + pm.events + .recv_until(|ev| match ev { + Event::PeerManager(PME::RoutingTableUpdate { next_hops, .. }) => { + if next_hops.get(&peer.cfg.id()).map_or(false, |v| v.len() > 0) { + Some(()) + } else { + None + } + } + _ => None, + }) + .await; + + for ttl in 0..5 { + let msg = RoutedMessageBody::Ping(Ping { nonce: rng.gen(), source: peer.cfg.id() }); + let msg = Box::new(peer.routed_message(msg, peer.cfg.id(), ttl, Some(clock.now_utc()))); + peer.send(PeerMessage::Routed(msg.clone())).await; + // If TTL is <2, then the message will be dropped (at least 2 hops are required). + if ttl < 2 { + pm.events + .recv_until(|ev| match ev { + Event::PeerManager(PME::RoutedMessageDropped) => Some(()), + _ => None, + }) + .await; + } else { + let got = peer + .events + .recv_until(|ev| match ev { + peer::testonly::Event::Network(PME::MessageProcessed(PeerMessage::Routed( + msg, + ))) => Some(msg), + _ => None, + }) + .await; + assert_eq!(msg.body, got.body); + assert_eq!(msg.ttl - 1, got.ttl); + } + } +} + +// After the initial exchange, all subsequent SyncRoutingTable messages are +// expected to contain only the diff of the known data. +#[tokio::test] +async fn repeated_data_in_sync_routing_table() { + init_test_logger(); + let mut rng = make_rng(921853233); + let rng = &mut rng; + let mut clock = time::FakeClock::default(); + let chain = Arc::new(data::Chain::make(&mut clock, rng, 10)); + let pm = peer_manager::testonly::start( + clock.clock(), + near_store::db::TestDB::new(), + chain.make_config(rng), + chain.clone(), + ) + .await; + let cfg = peer::testonly::PeerConfig { + network: chain.make_config(rng), + chain, + peers: vec![], + force_encoding: Some(Encoding::Proto), + nonce: None, + }; + let stream = tcp::Stream::connect(&pm.peer_info()).await.unwrap(); + let mut peer = peer::testonly::PeerHandle::start_endpoint(clock.clock(), cfg, stream).await; + let edge = peer.complete_handshake().await; + + let mut edges_got = HashSet::new(); + let mut edges_want = HashSet::new(); + let mut accounts_got = HashSet::new(); + let mut accounts_want = HashSet::new(); + edges_want.insert(edge); + + // Gradually increment the amount of data in the system and then broadcast it. + for _ in 0..10 { + // Wait for the new data to be broadcasted. + // Note that in the first iteration we expect just 1 edge, without sending anything before. + // It is important because the first SyncRoutingTable contains snapshot of all data known to + // the node (not just the diff), so we expect incremental behavior only after the first + // SyncRoutingTable. + // TODO(gprusak): the first SyncRoutingTable will be delayed, until we replace actix + // internal clock with a fake clock. + while edges_got != edges_want || accounts_got != accounts_want { + match peer.events.recv().await { + peer::testonly::Event::Network(PME::MessageProcessed( + PeerMessage::SyncRoutingTable(got), + )) => { + for a in got.accounts { + assert!(!accounts_got.contains(&a), "repeated broadcast: {a:?}"); + assert!(accounts_want.contains(&a), "unexpected broadcast: {a:?}"); + accounts_got.insert(a); + } + for e in got.edges { + assert!(!edges_got.contains(&e), "repeated broadcast: {e:?}"); + assert!(edges_want.contains(&e), "unexpected broadcast: {e:?}"); + edges_got.insert(e); + } + } + // Ignore other messages. + _ => {} + } + } + // Add more data. + let signer = data::make_signer(rng); + edges_want.insert(data::make_edge(&peer.cfg.signer(), &signer)); + accounts_want.insert(data::make_announce_account(rng)); + // Send all the data created so far. PeerManager is expected to discard the duplicates. + peer.send(PeerMessage::SyncRoutingTable(RoutingTableUpdate { + edges: edges_want.iter().cloned().collect(), + accounts: accounts_want.iter().cloned().collect(), + })) + .await; + } +} + +/// Awaits for SyncRoutingTable messages until all edges from `want` arrive. +/// Panics if any other edges arrive. +async fn wait_for_edges(peer: &mut peer::testonly::PeerHandle, want: &HashSet) { + let mut got = HashSet::new(); + while &got != want { + match peer.events.recv().await { + peer::testonly::Event::Network(PME::MessageProcessed( + PeerMessage::SyncRoutingTable(msg), + )) => { + got.extend(msg.edges); + assert!(want.is_superset(&got)); + } + // Ignore other messages. + _ => {} + } + } +} + +// After each handshake a full sync of routing table is performed with the peer. +// After a restart, all the edges reside in storage. The node shouldn't broadcast +// edges which it learned about before the restart. +// This test takes ~6s because of delays enforced in the PeerManager. +#[tokio::test] +async fn no_edge_broadcast_after_restart() { + init_test_logger(); + let mut rng = make_rng(921853233); + let rng = &mut rng; + let mut clock = time::FakeClock::default(); + let chain = Arc::new(data::Chain::make(&mut clock, rng, 10)); + + let mut total_edges = HashSet::new(); + let store = near_store::db::TestDB::new(); + + for i in 0..3 { + println!("iteration {i}"); + // Start a PeerManager and connect a peer to it. + let pm = peer_manager::testonly::start( + clock.clock(), + store.clone(), + chain.make_config(rng), + chain.clone(), + ) + .await; + let cfg = peer::testonly::PeerConfig { + network: chain.make_config(rng), + chain: chain.clone(), + peers: vec![], + force_encoding: Some(Encoding::Proto), + nonce: None, + }; + let stream = tcp::Stream::connect(&pm.peer_info()).await.unwrap(); + let mut peer = peer::testonly::PeerHandle::start_endpoint(clock.clock(), cfg, stream).await; + let edge = peer.complete_handshake().await; + + // Receive the initial sync, which will consist just of the current edge: + // - the disconnected edges from the previous iterations are not loaded yet. + // - the local edges weren't stored at all. + tracing::info!(target: "test", "wait_for_edges()"); + wait_for_edges(&mut peer, &[edge.clone()].into()).await; + + // Create a bunch of fresh unreachable edges, then send all the edges created so far. + let fresh_edges: HashSet<_> = [ + data::make_edge(&data::make_signer(rng), &data::make_signer(rng)), + data::make_edge(&data::make_signer(rng), &data::make_signer(rng)), + data::make_edge_tombstone(&data::make_signer(rng), &data::make_signer(rng)), + ] + .into(); + total_edges.extend(fresh_edges.clone()); + // We capture the events starting here to record all the edge prunnings after the + // SyncRoutingTable below is processed. + let mut events = pm.events.from_now(); + peer.send(PeerMessage::SyncRoutingTable(RoutingTableUpdate { + edges: total_edges.iter().cloned().collect::>(), + accounts: vec![], + })) + .await; + + // Wait for the fresh edges to be broadcasted back. + tracing::info!(target: "test", "wait_for_edges()"); + wait_for_edges(&mut peer, &fresh_edges).await; + + // Wait for all the disconnected edges created so far to be saved to storage. + tracing::info!(target: "test", "wait for pruning"); + let mut pruned = HashSet::new(); + while pruned != total_edges { + match events.recv().await { + Event::PeerManager(PME::RoutingTableUpdate { pruned_edges, .. }) => { + pruned.extend(pruned_edges) + } + _ => {} + } + } + } +} + +#[tokio::test] +async fn square() { + init_test_logger(); + let mut rng = make_rng(921853233); + let rng = &mut rng; + let mut clock = time::FakeClock::default(); + let chain = Arc::new(data::Chain::make(&mut clock, rng, 10)); + + tracing::info!(target:"test", "connect 4 nodes in a square"); + let pm0 = start_pm(clock.clock(), TestDB::new(), chain.make_config(rng), chain.clone()).await; + let pm1 = start_pm(clock.clock(), TestDB::new(), chain.make_config(rng), chain.clone()).await; + let pm2 = start_pm(clock.clock(), TestDB::new(), chain.make_config(rng), chain.clone()).await; + let pm3 = start_pm(clock.clock(), TestDB::new(), chain.make_config(rng), chain.clone()).await; + pm0.connect_to(&pm1.peer_info()).await; + pm1.connect_to(&pm2.peer_info()).await; + pm2.connect_to(&pm3.peer_info()).await; + pm3.connect_to(&pm0.peer_info()).await; + let id0 = pm0.cfg.node_id(); + let id1 = pm1.cfg.node_id(); + let id2 = pm2.cfg.node_id(); + let id3 = pm3.cfg.node_id(); + + pm0.wait_for_routing_table(&[ + (id1.clone(), vec![id1.clone()]), + (id3.clone(), vec![id3.clone()]), + (id2.clone(), vec![id1.clone(), id3.clone()]), + ]) + .await; + tracing::info!(target:"test","stop {id1}"); + drop(pm1); + tracing::info!(target:"test","wait for {id0} routing table"); + pm0.wait_for_routing_table(&[ + (id3.clone(), vec![id3.clone()]), + (id2.clone(), vec![id3.clone()]), + ]) + .await; + tracing::info!(target:"test","wait for {id2} routing table"); + pm2.wait_for_routing_table(&[ + (id3.clone(), vec![id3.clone()]), + (id0.clone(), vec![id3.clone()]), + ]) + .await; + tracing::info!(target:"test","wait for {id3} routing table"); + pm3.wait_for_routing_table(&[ + (id2.clone(), vec![id2.clone()]), + (id0.clone(), vec![id0.clone()]), + ]) + .await; + drop(pm0); + drop(pm2); + drop(pm3); +} From 41610119eec3a54560fcc4ee5d1dc6b6aa93b6ca Mon Sep 17 00:00:00 2001 From: pompon0 Date: Thu, 27 Oct 2022 13:38:45 +0200 Subject: [PATCH 039/103] Removed the delay before the initial SyncRoutingTable. (#7940) It was unnecessary and confusing. --- chain/network/src/peer/peer_actor.rs | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/chain/network/src/peer/peer_actor.rs b/chain/network/src/peer/peer_actor.rs index f17e0a4c2c4..4859897f1f1 100644 --- a/chain/network/src/peer/peer_actor.rs +++ b/chain/network/src/peer/peer_actor.rs @@ -60,9 +60,6 @@ const ROUTED_MESSAGE_CACHE_SIZE: usize = 1000; /// Duplicated messages will be dropped if routed through the same peer multiple times. const DROP_DUPLICATED_MESSAGES_PERIOD: time::Duration = time::Duration::milliseconds(50); -// TODO(gprusak): this delay is unnecessary, drop it. -const WAIT_FOR_SYNC_DELAY: time::Duration = time::Duration::milliseconds(1_000); - #[derive(Debug, Clone, PartialEq, Eq)] pub struct ConnectionClosedEvent { pub(crate) stream_id: tcp::StreamId, @@ -589,12 +586,8 @@ impl PeerActor { RoutingTableUpdate::from_edges(vec![conn.edge.clone()]), ))); } - ctx.spawn(wrap_future(async { - tokio::time::sleep(WAIT_FOR_SYNC_DELAY.try_into().unwrap()).await; - }).map(|_,act:&mut Self,_|{ - // Sync the RoutingTable. - act.sync_routing_table(); - })); + // Sync the RoutingTable. + act.sync_routing_table(); // Exchange peers periodically. ctx.spawn(wrap_future({ let conn = conn.clone(); From 156a6fc916061b297115aae80c2425fce8b15825 Mon Sep 17 00:00:00 2001 From: mm-near <91919554+mm-near@users.noreply.github.com> Date: Thu, 27 Oct 2022 13:53:59 +0200 Subject: [PATCH 040/103] Added test for genesis hash (#7909) Added a test to verify that genesis hash doesn't change. --- nearcore/src/runtime/mod.rs | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/nearcore/src/runtime/mod.rs b/nearcore/src/runtime/mod.rs index f18f5f26b72..70492930b3c 100644 --- a/nearcore/src/runtime/mod.rs +++ b/nearcore/src/runtime/mod.rs @@ -1634,6 +1634,7 @@ impl node_runtime::adapter::ViewRuntimeAdapter for NightshadeRuntime { mod test { use std::collections::BTreeSet; + use near_chain::{Chain, ChainGenesis}; use near_primitives::types::validator_stake::ValidatorStake; use num_rational::Ratio; @@ -3250,4 +3251,40 @@ mod test { let view_state_value = view_state.get(&key).unwrap().unwrap(); assert_eq!(state_value, view_state_value); } + + /// Check that mainnet genesis hash still matches, to make sure that we're still backwards compatible. + #[test] + fn test_genesis_hash() { + let genesis = near_mainnet_res::mainnet_genesis(); + let chain_genesis = ChainGenesis::new(&genesis); + let store = near_store::test_utils::create_test_store(); + + let tempdir = tempfile::tempdir().unwrap(); + let runtime = Arc::new(NightshadeRuntime::test_with_runtime_config_store( + tempdir.path(), + store.clone(), + &genesis, + TrackedConfig::new_empty(), + RuntimeConfigStore::new(None), + )); + + let block = Chain::make_genesis_block(&*runtime, &chain_genesis).unwrap(); + assert_eq!( + block.header().hash().to_string(), + "EPnLgE7iEq9s7yTkos96M3cWymH5avBAPm3qx3NXqR8H" + ); + + let epoch_manager = EpochManager::new_from_genesis_config(store, &genesis.config).unwrap(); + let epoch_info = epoch_manager.get_epoch_info(&EpochId::default()).unwrap(); + // Verify the order of the block producers. + assert_eq!( + [ + 1, 0, 1, 0, 0, 3, 3, 2, 2, 3, 0, 2, 0, 0, 1, 1, 1, 1, 3, 2, 3, 2, 0, 3, 3, 3, 0, 3, + 1, 3, 1, 0, 1, 2, 3, 0, 1, 0, 0, 0, 2, 2, 2, 3, 3, 3, 3, 1, 2, 0, 1, 0, 1, 0, 3, 2, + 1, 2, 0, 1, 3, 3, 1, 2, 1, 2, 1, 0, 2, 3, 1, 2, 1, 2, 3, 2, 0, 3, 3, 2, 0, 0, 2, 3, + 0, 3, 0, 2, 3, 1, 1, 2, 1, 0, 1, 2, 2, 1, 2, 0 + ], + epoch_info.block_producers_settlement() + ); + } } From 7c876a7d22915398d684222a097b2604f678c86b Mon Sep 17 00:00:00 2001 From: Marcelo Diop-Gonzalez Date: Thu, 27 Oct 2022 10:09:13 -0400 Subject: [PATCH 041/103] feat: add a neard amend-genesis command (#7939) In mocknet tests, currently `create_genesis_file()` is used to add validators and miscellaneous records to the genesis state. It has worked so far, but when trying to use it on the latest mainnet state to test out the `tools/mirror` code, it has trouble handling the 28G of data, since it reads it all into memory. So this PR adds a command to neard that will do the exact same job, but using a small amount of memory and a good amount faster. This also removes a bottleneck that previously made us run mocknet tests on machines with more memory than the recommended minimums, so we can consider changing that now if we want The goal is for nothing about `pytest/tests/mocknet/load_test_spoon.py` to change with this PR. The command line interface stays the same, and we detect whether the mocknet instances have a `neard` version with this subcommand and use it if so, producing the same records (although not exactly the same, as here we attempt to write a real storage_usage field) --- Cargo.lock | 26 + neard/Cargo.toml | 1 + neard/src/cli.rs | 7 + pytest/lib/mocknet.py | 310 ++++-- .../tests/mocknet/helpers/genesis_updater.py | 58 +- pytest/tools/mirror/test.py | 107 +- tools/amend-genesis/Cargo.toml | 30 + tools/amend-genesis/src/cli.rs | 99 ++ tools/amend-genesis/src/lib.rs | 938 ++++++++++++++++++ 9 files changed, 1410 insertions(+), 166 deletions(-) create mode 100644 tools/amend-genesis/Cargo.toml create mode 100644 tools/amend-genesis/src/cli.rs create mode 100644 tools/amend-genesis/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index ad8490f898a..52ed97cee5c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2818,6 +2818,31 @@ dependencies = [ "once_cell", ] +[[package]] +name = "near-amend-genesis" +version = "0.0.0" +dependencies = [ + "anyhow", + "borsh", + "clap 3.1.18", + "near-chain", + "near-chain-configs", + "near-crypto", + "near-epoch-manager", + "near-network", + "near-primitives", + "near-primitives-core", + "near-store", + "near-test-contracts", + "nearcore", + "node-runtime", + "num-rational", + "serde", + "serde_json", + "tempfile", + "tracing", +] + [[package]] name = "near-cache" version = "0.0.0" @@ -3674,6 +3699,7 @@ dependencies = [ "anyhow", "clap 3.1.18", "futures", + "near-amend-genesis", "near-chain-configs", "near-jsonrpc-primitives", "near-mirror", diff --git a/neard/Cargo.toml b/neard/Cargo.toml index 54d76f808d6..96348758174 100644 --- a/neard/Cargo.toml +++ b/neard/Cargo.toml @@ -32,6 +32,7 @@ tokio.workspace = true tracing.workspace = true nearcore = { path = "../nearcore" } +near-amend-genesis = { path = "../tools/amend-genesis" } near-chain-configs = { path = "../core/chain-configs" } near-jsonrpc-primitives = { path = "../chain/jsonrpc-primitives" } near-mirror = { path = "../tools/mirror" } diff --git a/neard/src/cli.rs b/neard/src/cli.rs index fb3d684b663..f7526e91fa1 100644 --- a/neard/src/cli.rs +++ b/neard/src/cli.rs @@ -1,6 +1,7 @@ use crate::log_config_watcher::{LogConfigWatcher, UpdateBehavior}; use anyhow::Context; use clap::{Args, Parser}; +use near_amend_genesis::AmendGenesisCommand; use near_chain_configs::GenesisValidationMode; use near_jsonrpc_primitives::types::light_client::RpcLightClientExecutionProofResponse; use near_mirror::MirrorCommand; @@ -97,6 +98,9 @@ impl NeardCmd { NeardSubCommand::Mirror(cmd) => { cmd.run()?; } + NeardSubCommand::AmendGenesis(cmd) => { + cmd.run()?; + } }; Ok(()) } @@ -189,6 +193,9 @@ pub(super) enum NeardSubCommand { /// Mirror transactions from a source chain to a test chain with state forked /// from it, reproducing traffic and state as closely as possible. Mirror(MirrorCommand), + + /// Amend a genesis/records file created by `dump-state`. + AmendGenesis(AmendGenesisCommand), } #[derive(Parser)] diff --git a/pytest/lib/mocknet.py b/pytest/lib/mocknet.py index 713f8531a90..3053251c4c5 100644 --- a/pytest/lib/mocknet.py +++ b/pytest/lib/mocknet.py @@ -3,6 +3,7 @@ import random import os.path import shlex +import subprocess import tempfile import time @@ -28,6 +29,7 @@ TX_OUT_FILE = '/home/ubuntu/tx_events' WASM_FILENAME = 'simple_contract.wasm' +TREASURY_ACCOUNT = 'test.near' MASTER_ACCOUNT = 'near' SKYWARD_ACCOUNT = 'skyward.near' SKYWARD_TOKEN_ACCOUNT = 'token.skyward.near' @@ -73,6 +75,16 @@ 3760931, 3746129, 3741225, 3727313, 3699201, 3620341 ] +ACCOUNTS = { + TREASURY_ACCOUNT: (10**7) * ONE_NEAR, + MASTER_ACCOUNT: (10**7) * ONE_NEAR, + SKYWARD_ACCOUNT: (10**6) * ONE_NEAR, + TOKEN1_ACCOUNT: (10**6) * ONE_NEAR, + TOKEN2_ACCOUNT: (10**6) * ONE_NEAR, + TOKEN2_OWNER_ACCOUNT: (10**6) * ONE_NEAR, + ACCOUNT1_ACCOUNT: (10**6) * ONE_NEAR, +} + def get_node(hostname): instance_name = hostname @@ -437,6 +449,30 @@ def compress_and_upload(nodes, src_filename, dst_filename): nodes) +# check each of /home/ubuntu/neard and /home/ubuntu/neard.upgrade to see +# whether the amend-genesis command is avaialable. If it is, then we'll use that +# to update the genesis files, otherwise we'll use the python create_genesis_file() +# function. We can't just check this individually on each machine since the two +# functions have slightly different behavior (for example, neard amend-genesis will +# set the storage usage fields in account records), so the resulting genesis block +# hashes will not match up. +# +# Return value is None if it's not available, otherwise the path +# to the binary where it should be available +def neard_amend_genesis_path(node): + r = node.machine.run('/home/ubuntu/neard amend-genesis --help', + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL) + if r.exitcode == 0: + return '/home/ubuntu/neard' + r = node.machine.run('/home/ubuntu/neard.upgrade amend-genesis --help', + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL) + if r.exitcode == 0: + return '/home/ubuntu/neard.upgrade' + return None + + # We assume that the nodes already have the .near directory with the files # node_key.json, validator_key.json and config.json. def create_and_upload_genesis(validator_nodes, @@ -465,96 +501,28 @@ def create_and_upload_genesis(validator_nodes, config_filename_in = f'/home/ubuntu/.near/{chain_id_in}-genesis/config.json' stamp = time.strftime('%Y%m%d-%H%M%S', time.gmtime()) done_filename = f'/home/ubuntu/genesis_update_done_{stamp}.txt' + neard = neard_amend_genesis_path(validator_nodes[1]) pmap( lambda node: start_genesis_updater( node, 'genesis_updater.py', genesis_filename_in, - '/home/ubuntu/.near/genesis.json', records_filename_in, - '/home/ubuntu/.near/records.json', config_filename_in, - '/home/ubuntu/.near/config.json', chain_id, - validator_node_names, rpc_node_names, done_filename, + records_filename_in, config_filename_in, '/home/ubuntu/.near/', + chain_id, validator_node_names, rpc_node_names, done_filename, epoch_length, node_pks, increasing_stakes, num_seats, - single_shard, all_node_pks, node_ips), + single_shard, all_node_pks, node_ips, neard), validator_nodes + rpc_nodes) pmap(lambda node: wait_genesis_updater_done(node, done_filename), validator_nodes + rpc_nodes) -def create_genesis_file(validator_node_names, - genesis_filename_in, - genesis_filename_out, - records_filename_in, - records_filename_out, - rpc_node_names=None, - chain_id=None, - append=False, - epoch_length=None, - node_pks=None, - increasing_stakes=0.0, - num_seats=None, - single_shard=False): - logger.info( - f'create_genesis_file: validator_node_names: {validator_node_names}') - logger.info(f'create_genesis_file: rpc_node_names: {rpc_node_names}') - with open(genesis_filename_in) as f: - genesis_config = json.load(f) - with open(records_filename_in) as f: - records = json.load(f) +def extra_genesis_records(validator_node_names, rpc_node_names, node_pks, + seen_accounts, num_seats, increasing_stakes): + records = [] VALIDATOR_BALANCE = (10**2) * ONE_NEAR RPC_BALANCE = (10**1) * ONE_NEAR - TREASURY_ACCOUNT = 'test.near' - TREASURY_BALANCE = (10**7) * ONE_NEAR LOAD_TESTER_BALANCE = (10**4) * ONE_NEAR - SKYWARD_CONTRACT_BALANCE = (10**6) * ONE_NEAR - TOKEN1_BALANCE = (10**6) * ONE_NEAR - TOKEN2_BALANCE = (10**6) * ONE_NEAR - TOKEN2_OWNER_BALANCE = (10**6) * ONE_NEAR - ACCOUNT1_BALANCE = (10**6) * ONE_NEAR - - if chain_id: - if append: - assert genesis_config[ - 'chain_id'] != chain_id, 'Can only append to the original genesis once' - - genesis_config['chain_id'] = chain_id - - if append: - # Unstake all tokens from all existing accounts. - for record in records: - if 'Account' in record: - account = record['Account'].get('account', {}) - locked = int(account.get('locked', 0)) - if locked > 0: - amount = int(account.get('amount', 0)) - account['amount'] = str(amount + locked) - account['locked'] = 0 - - else: - records = [] - - master_balance = 10**7 - assert master_balance > 0 - accounts = { - TREASURY_ACCOUNT: TREASURY_BALANCE, - MASTER_ACCOUNT: master_balance, - SKYWARD_ACCOUNT: SKYWARD_CONTRACT_BALANCE, - TOKEN1_ACCOUNT: TOKEN1_BALANCE, - TOKEN2_ACCOUNT: TOKEN2_BALANCE, - TOKEN2_OWNER_ACCOUNT: TOKEN2_OWNER_BALANCE, - ACCOUNT1_ACCOUNT: ACCOUNT1_BALANCE - } - seen_accounts = set() - for record in records: - if 'Account' in record: - account_record = record['Account'] - account_id = account_record.get('account_id', '') - if account_id in accounts: - seen_accounts.add(account_id) - account = account_record.get('account', {}) - account['amount'] = str(accounts[account_id]) - - for account_id, balance in accounts.items(): + for account_id, balance in ACCOUNTS.items(): if account_id not in seen_accounts: records.append({ 'Account': { @@ -673,19 +641,137 @@ def create_genesis_file(validator_node_names, } }) - genesis_config['validators'] = [] + validators = [] seats = compute_seats(stakes, num_seats) seats_taken = 0 for seats, staked, account_id in seats: if seats + seats_taken > num_seats: break - genesis_config['validators'].append({ + validators.append({ 'account_id': account_id, 'public_key': PUBLIC_KEY, 'amount': str(staked), }) seats_taken += seats + return records, validators + + +def neard_amend_genesis(neard, validator_node_names, genesis_filename_in, + records_filename_in, out_dir, rpc_node_names, chain_id, + epoch_length, node_pks, increasing_stakes, num_seats, + single_shard): + extra_records, validators = extra_genesis_records(validator_node_names, + rpc_node_names, node_pks, + set(), num_seats, + increasing_stakes) + + validators_filename = os.path.join(out_dir, 'validators.json') + extra_records_filename = os.path.join(out_dir, 'extra-records.json') + genesis_filename_out = os.path.join(out_dir, 'genesis.json') + records_filename_out = os.path.join(out_dir, 'records.json') + + with open(validators_filename, 'w') as f: + json.dump(validators, f) + with open(extra_records_filename, 'w') as f: + json.dump(extra_records, f) + + cmd = [ + neard, + 'amend-genesis', + '--genesis-file-in', + genesis_filename_in, + '--records-file-in', + records_filename_in, + '--extra-records', + extra_records_filename, + '--validators', + validators_filename, + '--genesis-file-out', + genesis_filename_out, + '--records-file-out', + records_filename_out, + '--num-seats', + str(int(num_seats)), + '--transaction-validity-period', + str(10**9), + '--protocol-version', + '49', + ] + if chain_id is not None: + cmd.extend(['--chain-id', chain_id]) + if epoch_length is not None: + cmd.extend(['--epoch-length', str(epoch_length)]) + if single_shard: + shard_layout_filename = os.path.join(out_dir, 'shard_layout.json') + with open(shard_layout_filename, 'w') as f: + json.dump({'V0': {'num_shards': 1, 'version': 0}}, f) + + cmd.extend(['--shard-layout-file', shard_layout_filename]) + + subprocess.run(cmd, text=True) + + +def do_create_genesis_file(validator_node_names, + genesis_filename_in, + genesis_filename_out, + records_filename_in, + records_filename_out, + rpc_node_names=None, + chain_id=None, + append=False, + epoch_length=None, + node_pks=None, + increasing_stakes=0.0, + num_seats=None, + single_shard=False): + logger.info( + f'create_genesis_file: validator_node_names: {validator_node_names}') + logger.info(f'create_genesis_file: rpc_node_names: {rpc_node_names}') + with open(genesis_filename_in) as f: + genesis_config = json.load(f) + if append: + with open(records_filename_in) as f: + records = json.load(f) + else: + records = [] + + if chain_id: + if append: + assert genesis_config[ + 'chain_id'] != chain_id, 'Can only append to the original genesis once' + + genesis_config['chain_id'] = chain_id + + if append: + # Unstake all tokens from all existing accounts. + for record in records: + if 'Account' in record: + account = record['Account'].get('account', {}) + locked = int(account.get('locked', 0)) + if locked > 0: + amount = int(account.get('amount', 0)) + account['amount'] = str(amount + locked) + account['locked'] = 0 + + seen_accounts = set() + for record in records: + if 'Account' in record: + account_record = record['Account'] + account_id = account_record.get('account_id', '') + if account_id in ACCOUNTS: + seen_accounts.add(account_id) + account = account_record.get('account', {}) + account['amount'] = str(ACCOUNTS[account_id]) + + extra_records, validators = extra_genesis_records(validator_node_names, + rpc_node_names, node_pks, + seen_accounts, num_seats, + increasing_stakes) + + records.extend(extra_records) + + genesis_config['validators'] = validators total_supply = 0 for record in records: account = record.get('Account', {}).get('account', {}) @@ -716,6 +802,34 @@ def create_genesis_file(validator_node_names, json.dump(records, f) +def create_genesis_file(validator_node_names, + genesis_filename_in, + records_filename_in, + out_dir, + rpc_node_names=None, + chain_id=None, + append=False, + epoch_length=None, + node_pks=None, + increasing_stakes=0.0, + num_seats=None, + single_shard=False, + neard=None): + if append and neard is not None: + neard_amend_genesis(neard, validator_node_names, genesis_filename_in, + records_filename_in, out_dir, rpc_node_names, + chain_id, epoch_length, node_pks, increasing_stakes, + num_seats, single_shard) + else: + genesis_filename_out = os.path.join(out_dir, 'genesis.json') + records_filename_out = os.path.join(out_dir, 'records.json') + do_create_genesis_file(validator_node_names, genesis_filename_in, + genesis_filename_out, records_filename_in, + records_filename_out, rpc_node_names, chain_id, + append, epoch_length, node_pks, + increasing_stakes, num_seats, single_shard) + + def download_and_read_json(node, filename): tmp_file = tempfile.NamedTemporaryFile(mode='r+', delete=False) node.machine.download(filename, tmp_file.name) @@ -829,19 +943,20 @@ def reset_data(node, retries=0): ) -def start_genesis_updater_script( - script, genesis_filename_in, genesis_filename_out, records_filename_in, - records_filename_out, config_filename_in, config_filename_out, chain_id, - validator_nodes, rpc_nodes, done_filename, epoch_length, node_pks, - increasing_stakes, num_seats, single_shard, all_node_pks, node_ips): +def start_genesis_updater_script(script, genesis_filename_in, + records_filename_in, config_filename_in, + out_dir, chain_id, validator_nodes, rpc_nodes, + done_filename, epoch_length, node_pks, + increasing_stakes, num_seats, single_shard, + all_node_pks, node_ips, neard): cmd = ' '.join([ shlex.quote(str(arg)) for arg in [ 'nohup', './venv/bin/python', script, genesis_filename_in, - genesis_filename_out, records_filename_in, records_filename_out, - config_filename_in, config_filename_out, chain_id, ','.join( - validator_nodes), ','.join(rpc_nodes), done_filename, + records_filename_in, config_filename_in, out_dir, chain_id, + ','.join(validator_nodes), ','.join(rpc_nodes), done_filename, epoch_length, ','.join(node_pks), increasing_stakes, num_seats, - single_shard, ','.join(all_node_pks), ','.join(node_ips) + single_shard, ','.join(all_node_pks), ','.join( + node_ips), neard if neard is not None else 'None' ] ]) return ''' @@ -851,21 +966,18 @@ def start_genesis_updater_script( def start_genesis_updater(node, script, genesis_filename_in, - genesis_filename_out, records_filename_in, - records_filename_out, config_filename_in, - config_filename_out, chain_id, validator_nodes, - rpc_nodes, done_filename, epoch_length, node_pks, - increasing_stakes, num_seats, single_shard, - all_node_pks, node_ips): + records_filename_in, config_filename_in, out_dir, + chain_id, validator_nodes, rpc_nodes, done_filename, + epoch_length, node_pks, increasing_stakes, num_seats, + single_shard, all_node_pks, node_ips, neard): logger.info(f'Starting genesis_updater on {node.instance_name}') node.machine.run('bash', input=start_genesis_updater_script( - script, genesis_filename_in, genesis_filename_out, - records_filename_in, records_filename_out, - config_filename_in, config_filename_out, chain_id, - validator_nodes, rpc_nodes, done_filename, - epoch_length, node_pks, increasing_stakes, num_seats, - single_shard, all_node_pks, node_ips)) + script, genesis_filename_in, records_filename_in, + config_filename_in, out_dir, chain_id, validator_nodes, + rpc_nodes, done_filename, epoch_length, node_pks, + increasing_stakes, num_seats, single_shard, + all_node_pks, node_ips, neard)) def start_genesis_update_waiter_script(done_filename): diff --git a/pytest/tests/mocknet/helpers/genesis_updater.py b/pytest/tests/mocknet/helpers/genesis_updater.py index 26d1a46280d..e188ef14d1f 100755 --- a/pytest/tests/mocknet/helpers/genesis_updater.py +++ b/pytest/tests/mocknet/helpers/genesis_updater.py @@ -7,6 +7,7 @@ need at 4 hours. """ +import os import pathlib import sys @@ -22,43 +23,43 @@ def str_to_bool(arg): def main(argv): logger.info(argv) - assert len(argv) == 18 + assert len(argv) == 17 genesis_filename_in = argv[1] - genesis_filename_out = argv[2] - records_filename_in = argv[3] - records_filename_out = argv[4] - config_filename_in = argv[5] - config_filename_out = argv[6] + records_filename_in = argv[2] + config_filename_in = argv[3] + out_dir = argv[4] - chain_id = argv[7] + chain_id = argv[5] validator_node_names = None - if argv[8]: - validator_node_names = argv[8].split(',') + if argv[6]: + validator_node_names = argv[6].split(',') rpc_node_names = None - if argv[9]: - rpc_node_names = argv[9].split(',') - done_filename = argv[10] - epoch_length = int(argv[11]) + if argv[7]: + rpc_node_names = argv[7].split(',') + done_filename = argv[8] + epoch_length = int(argv[9]) node_pks = None - if argv[12]: - node_pks = argv[12].split(',') - increasing_stakes = float(argv[13]) - num_seats = float(argv[14]) - single_shard = str_to_bool(argv[15]) + if argv[10]: + node_pks = argv[10].split(',') + increasing_stakes = float(argv[11]) + num_seats = float(argv[12]) + single_shard = str_to_bool(argv[13]) all_node_pks = None - if argv[16]: - all_node_pks = argv[16].split(',') + if argv[14]: + all_node_pks = argv[14].split(',') node_ips = None - if argv[17]: - node_ips = argv[17].split(',') + if argv[15]: + node_ips = argv[15].split(',') + if argv[16].lower() == 'none': + neard = None + else: + neard = argv[16] assert genesis_filename_in - assert genesis_filename_out assert records_filename_in - assert records_filename_out assert config_filename_in - assert config_filename_out + assert out_dir assert chain_id assert validator_node_names assert done_filename @@ -71,9 +72,8 @@ def main(argv): mocknet.create_genesis_file(validator_node_names, genesis_filename_in, - genesis_filename_out, records_filename_in, - records_filename_out, + out_dir, rpc_node_names=rpc_node_names, chain_id=chain_id, append=True, @@ -81,7 +81,9 @@ def main(argv): node_pks=node_pks, increasing_stakes=increasing_stakes, num_seats=num_seats, - single_shard=single_shard) + single_shard=single_shard, + neard=neard) + config_filename_out = os.path.join(out_dir, 'config.json') mocknet.update_config_file(config_filename_in, config_filename_out, all_node_pks, node_ips) diff --git a/pytest/tools/mirror/test.py b/pytest/tools/mirror/test.py index b51f9186d97..30ef1da51a7 100755 --- a/pytest/tools/mirror/test.py +++ b/pytest/tools/mirror/test.py @@ -46,17 +46,22 @@ def ordinal_to_port(port, ordinal): return f'0.0.0.0:{port + 10 + ordinal}' +def copy_genesis(home): + shutil.copy(dot_near() / 'test0/forked/genesis.json', home / 'genesis.json') + shutil.copy(dot_near() / 'test0/forked/records.json', home / 'records.json') + + def init_target_dir(neard, home, ordinal, validator_account=None): mkdir_clean(home) try: - subprocess.check_output([neard, '--home', home, 'init'], - stderr=subprocess.STDOUT) + args = [neard, '--home', home, 'init'] + if validator_account is not None: + args.extend(['--account-id', validator_account]) + subprocess.check_output(args, stderr=subprocess.STDOUT) except subprocess.CalledProcessError as e: sys.exit(f'"neard init" command failed: output: {e.stdout}') shutil.copy(dot_near() / 'test0/config.json', home / 'config.json') - shutil.copy(dot_near() / 'test0/forked/genesis.json', home / 'genesis.json') - shutil.copy(dot_near() / 'test0/forked/records.json', home / 'records.json') with open(home / 'config.json', 'r') as f: config = json.load(f) @@ -68,18 +73,6 @@ def init_target_dir(neard, home, ordinal, validator_account=None): if validator_account is None: os.remove(home / 'validator_key.json') - else: - # this key and the suffix -load-test.near are hardcoded in create_genesis_file() - with open(home / 'validator_key.json', 'w') as f: - json.dump( - { - 'account_id': - f'{validator_account + "-load-test.near"}', - 'public_key': - 'ed25519:76NVkDErhbP1LGrSAf5Db6BsFJ6LBw6YVA4BsfTBohmN', - 'secret_key': - 'ed25519:3cCk8KUWBySGCxBcn1syMoY5u73wx5eaPLRbQcMi23LwBA3aLsqEbA33Ww1bsJaFrchmDciGe9otdn45SrDSkow2' - }, f) def init_target_dirs(neard): @@ -88,15 +81,11 @@ def init_target_dirs(neard): for account_id in TARGET_VALIDATORS: home = dot_near() / f'test_target_{account_id}' - dirs.append(str(home)) + dirs.append(home) init_target_dir(neard, home, ordinal, validator_account=account_id) ordinal += 1 - observer = dot_near() / f'{MIRROR_DIR}/target' - init_target_dir(neard, observer, ordinal, validator_account=None) - shutil.copy(dot_near() / 'test0/output/mirror-secret.json', - observer / 'mirror-secret.json') - return dirs, observer + return dirs def create_forked_chain(config, near_root): @@ -126,24 +115,64 @@ def create_forked_chain(config, near_root): except subprocess.CalledProcessError as e: sys.exit(f'"mirror prepare" command failed: output: {e.stdout}') + dirs = init_target_dirs(neard) + + target_dir = dot_near() / f'{MIRROR_DIR}/target' + init_target_dir(neard, + target_dir, + NUM_VALIDATORS + 1 + len(dirs), + validator_account=None) + shutil.copy(dot_near() / 'test0/output/mirror-secret.json', + target_dir / 'mirror-secret.json') + os.mkdir(dot_near() / 'test0/forked') - genesis_filename_in = dot_near() / 'test0/output/genesis.json' - genesis_filename_out = dot_near() / 'test0/forked/genesis.json' - records_filename_in = dot_near() / 'test0/output/mirror-records.json' - records_filename_out = dot_near() / 'test0/forked/records.json' - create_genesis_file(TARGET_VALIDATORS, - genesis_filename_in=genesis_filename_in, - genesis_filename_out=genesis_filename_out, - records_filename_in=records_filename_in, - records_filename_out=records_filename_out, - rpc_node_names=[], - chain_id='foonet', - append=True, - epoch_length=20, - node_pks=None, - increasing_stakes=0.0, - num_seats=len(TARGET_VALIDATORS)) - return init_target_dirs(neard) + genesis_file_in = dot_near() / 'test0/output/genesis.json' + genesis_file_out = dot_near() / 'test0/forked/genesis.json' + records_file_in = dot_near() / 'test0/output/mirror-records.json' + records_file_out = dot_near() / 'test0/forked/records.json' + + validators = [] + for d in dirs: + with open(d / 'validator_key.json') as f: + key = json.load(f) + validators.append({ + 'account_id': key['account_id'], + 'public_key': key['public_key'], + 'amount': '700000000000000' + }) + + validators_file = dot_near() / 'test0/forked/validators.json' + with open(validators_file, 'w') as f: + json.dump(validators, f) + + try: + subprocess.check_output([ + neard, + 'amend-genesis', + '--genesis-file-in', + genesis_file_in, + '--records-file-in', + records_file_in, + '--genesis-file-out', + genesis_file_out, + '--records-file-out', + records_file_out, + '--validators', + validators_file, + '--chain-id', + 'foonet', + '--epoch-length', + '20', + ], + stderr=subprocess.STDOUT) + except subprocess.CalledProcessError as e: + sys.exit(f'"amend-genesis" command failed: output: {e.stdout}') + + for d in dirs: + copy_genesis(d) + copy_genesis(target_dir) + + return [str(d) for d in dirs], target_dir def init_mirror_dir(home, source_boot_node): diff --git a/tools/amend-genesis/Cargo.toml b/tools/amend-genesis/Cargo.toml new file mode 100644 index 00000000000..73383e9a036 --- /dev/null +++ b/tools/amend-genesis/Cargo.toml @@ -0,0 +1,30 @@ +[package] +name = "near-amend-genesis" +version = "0.0.0" +authors.workspace = true +publish = false +# Please update rust-toolchain.toml as well when changing version here: +rust-version.workspace = true +edition.workspace = true + +[dependencies] +anyhow.workspace = true +borsh.workspace = true +clap.workspace = true +num-rational.workspace = true +serde.workspace = true +serde_json.workspace = true +tempfile.workspace = true +tracing.workspace = true + +near-chain = { path = "../../chain/chain" } +near-chain-configs = { path = "../../core/chain-configs" } +near-crypto = { path = "../../core/crypto" } +near-epoch-manager = { path = "../../chain/epoch-manager" } +near-network = { path = "../../chain/network" } +near-primitives = { path = "../../core/primitives" } +near-primitives-core = { path = "../../core/primitives-core" } +near-store = { path = "../../core/store" } +near-test-contracts = { path = "../../runtime/near-test-contracts" } +nearcore = { path = "../../nearcore" } +node-runtime = { path = "../../runtime/runtime" } diff --git a/tools/amend-genesis/src/cli.rs b/tools/amend-genesis/src/cli.rs new file mode 100644 index 00000000000..1821c19fe14 --- /dev/null +++ b/tools/amend-genesis/src/cli.rs @@ -0,0 +1,99 @@ +use clap::Parser; +use near_primitives::types::NumBlocks; +use near_primitives::types::{BlockHeightDelta, NumSeats}; +use near_primitives::version::ProtocolVersion; +use num_rational::Rational32; +use std::path::PathBuf; + +/// Amend a genesis/records file created by `dump-state`. +#[derive(Parser)] +pub struct AmendGenesisCommand { + /// path to the input genesis file + #[clap(long)] + genesis_file_in: PathBuf, + /// path to the output genesis file + #[clap(long)] + genesis_file_out: PathBuf, + /// path to the input records file. Note that right now this must be provided, and + /// this command will not work with a genesis file that itself contains the records + #[clap(long)] + records_file_in: PathBuf, + /// path to the output records file + #[clap(long)] + records_file_out: PathBuf, + /// path to a JSON list of AccountInfos representing the validators to put in the + /// output genesis state. These are JSON maps of the form + /// { + /// "account_id": , + /// "public_key": , + /// "amount": , + /// } + #[clap(long)] + validators: PathBuf, + /// path to extra records to add to the output state. Right now only Accounts and AccessKey + /// records are supported, and any added accounts must have zero `code_hash` + #[clap(long)] + extra_records: Option, + /// chain ID to set on the output genesis + #[clap(long)] + chain_id: Option, + /// protocol version to set on the output genesis + #[clap(long)] + protocol_version: Option, + /// num_seats to set in the output genesis file + #[clap(long)] + num_seats: Option, + /// epoch length to set in the output genesis file + #[clap(long)] + epoch_length: Option, + /// transaction_validity_period to set in the output genesis file + #[clap(long)] + transaction_validity_period: Option, + /// block_producer_kickout_threshold to set in the output genesis file + #[clap(long)] + block_producer_kickout_threshold: Option, + /// chunk_producer_kickout_threshold to set in the output genesis file + #[clap(long)] + chunk_producer_kickout_threshold: Option, + /// protocol_reward_rate to set in the output genesis file. Give a ratio here (e.g. "1/10") + #[clap(long)] + protocol_reward_rate: Option, + /// optional file that should contain a JSON-serialized shard layout + #[clap(long)] + shard_layout_file: Option, + /// runtime fees config `num_bytes_account` value. Used to initialize the `storage_usage` field + /// on accounts in the output state + #[clap(long)] + num_bytes_account: Option, + /// runtime fees config `num_extra_bytes_record` value. Used to initialize the `storage_usage` field + /// on accounts in the output state + #[clap(long)] + num_extra_bytes_record: Option, +} + +impl AmendGenesisCommand { + pub fn run(self) -> anyhow::Result<()> { + let genesis_changes = crate::GenesisChanges { + chain_id: self.chain_id, + protocol_version: self.protocol_version, + num_seats: self.num_seats, + epoch_length: self.epoch_length, + transaction_validity_period: self.transaction_validity_period, + protocol_reward_rate: self.protocol_reward_rate, + block_producer_kickout_threshold: self.block_producer_kickout_threshold, + chunk_producer_kickout_threshold: self.chunk_producer_kickout_threshold, + }; + crate::amend_genesis( + &self.genesis_file_in, + &self.genesis_file_out, + &self.records_file_in, + &self.records_file_out, + self.extra_records.as_ref(), + &self.validators, + self.shard_layout_file.as_ref(), + &genesis_changes, + self.num_bytes_account.unwrap_or(100), + self.num_extra_bytes_record.unwrap_or(40), + ) + } +} diff --git a/tools/amend-genesis/src/lib.rs b/tools/amend-genesis/src/lib.rs new file mode 100644 index 00000000000..cba822d229f --- /dev/null +++ b/tools/amend-genesis/src/lib.rs @@ -0,0 +1,938 @@ +use anyhow::Context; +use borsh::BorshSerialize; +use near_chain_configs::{Genesis, GenesisValidationMode}; +use near_crypto::PublicKey; +use near_primitives::hash::CryptoHash; +use near_primitives::shard_layout::ShardLayout; +use near_primitives::state_record::StateRecord; +use near_primitives::types::{AccountId, AccountInfo}; +use near_primitives::utils; +use near_primitives::version::ProtocolVersion; +use near_primitives_core::account::{AccessKey, Account}; +use near_primitives_core::types::{Balance, BlockHeightDelta, NumBlocks, NumSeats}; +use num_rational::Rational32; +use serde::ser::{SerializeSeq, Serializer}; +use std::collections::{hash_map, HashMap}; +use std::fs::File; +use std::io::{BufReader, BufWriter}; +use std::path::Path; + +mod cli; + +pub use cli::AmendGenesisCommand; + +// while parsing the --extra-records file we will keep track of the records we see for each +// account here, and then at the end figure out what to put in the storage_usage field +#[derive(Debug, Default)] +struct AccountRecords { + account: Option, + // when we parse the validators file, we will set the balance in the account to 0 + // and set this to true so we remember later to set some default value, and if we + // end up seeing the account listed in the input records file, we'll use the total + // given there + amount_needed: bool, + keys: HashMap, +} + +// set the total balance to what's in src, keeping the locked amount the same +// must only be called if self.account.is_some() +fn set_total_balance(dst: &mut Account, src: &Account) { + let total = src.amount() + src.locked(); + if total > dst.locked() { + dst.set_amount(total - dst.locked()); + } +} + +impl AccountRecords { + fn new(amount: Balance, locked: Balance, num_bytes_account: u64) -> Self { + let mut ret = Self::default(); + ret.set_account(amount, locked, num_bytes_account); + ret + } + + fn new_validator(stake: Balance, num_bytes_account: u64) -> Self { + let mut ret = Self::default(); + ret.set_account(0, stake, num_bytes_account); + ret.amount_needed = true; + ret + } + + fn set_account(&mut self, amount: Balance, locked: Balance, num_bytes_account: u64) { + assert!(self.account.is_none()); + let account = Account::new(amount, locked, CryptoHash::default(), num_bytes_account); + self.account = Some(account); + } + + fn update_from_existing(&mut self, existing: &Account) { + match &mut self.account { + Some(account) => { + // an account added in extra_records (or one of the validators) also exists in the original + // records. Set the storage usage to reflect whatever's in the original records, and at the + // end we will add to the storage usage with any extra keys added for this account + account.set_storage_usage(existing.storage_usage()); + account.set_code_hash(existing.code_hash()); + if self.amount_needed { + set_total_balance(account, existing); + } + } + None => { + let mut account = existing.clone(); + account.set_amount(account.amount() + account.locked()); + account.set_locked(0); + self.account = Some(account); + } + } + self.amount_needed = false; + } + + fn write_out( + self, + account_id: AccountId, + seq: &mut S, + total_supply: &mut Balance, + num_extra_bytes_record: u64, + ) -> anyhow::Result<()> + where + ::Error: Send + Sync + 'static, + { + match self.account { + Some(mut account) => { + for (public_key, access_key) in self.keys { + let storage_usage = account.storage_usage() + + public_key.len() as u64 + + access_key.try_to_vec().unwrap().len() as u64 + + num_extra_bytes_record; + account.set_storage_usage(storage_usage); + + seq.serialize_element(&StateRecord::AccessKey { + account_id: account_id.clone(), + public_key, + access_key, + })?; + } + if self.amount_needed { + account.set_amount(10_000 * nearcore::config::NEAR_BASE); + } + *total_supply += account.amount() + account.locked(); + seq.serialize_element(&StateRecord::Account { account_id, account })?; + } + None => { + tracing::warn!("access keys for {} were included in --extra-records, but no Account record was found. Not adding them to the output", &account_id); + } + } + Ok(()) + } +} + +fn validator_records( + validators: &[AccountInfo], + num_bytes_account: u64, +) -> anyhow::Result> { + let mut records = HashMap::new(); + for AccountInfo { account_id, public_key, amount } in validators.iter() { + let mut r = AccountRecords::new_validator(*amount, num_bytes_account); + r.keys.insert(public_key.clone(), AccessKey::full_access()); + if records.insert(account_id.clone(), r).is_some() { + anyhow::bail!("validator {} specified twice", account_id); + } + } + Ok(records) +} + +fn parse_validators>(path: P) -> anyhow::Result> { + let validators = std::fs::read_to_string(path.as_ref()) + .with_context(|| format!("failed reading from {:?}", path.as_ref()))?; + let validators = serde_json::from_str(&validators) + .with_context(|| format!("failed deserializing from {:?}", path.as_ref()))?; + Ok(validators) +} + +fn parse_extra_records>( + records_file: P, + num_bytes_account: u64, +) -> anyhow::Result> { + let reader = + BufReader::new(File::open(records_file).context("Failed opening --extra-records")?); + let mut records = HashMap::new(); + + let mut result = Ok(()); + near_chain_configs::stream_records_from_file(reader, |r| { + match r { + StateRecord::Account { account_id, account } => { + if account.code_hash() != CryptoHash::default() { + result = Err(anyhow::anyhow!( + "FIXME: accounts in --extra-records with code_hash set not supported" + )); + } + match records.entry(account_id.clone()) { + hash_map::Entry::Vacant(e) => { + let r = AccountRecords::new( + account.amount(), + account.locked(), + num_bytes_account, + ); + e.insert(r); + } + hash_map::Entry::Occupied(mut e) => { + let r = e.get_mut(); + + if r.account.is_some() { + result = Err(anyhow::anyhow!( + "account {} given twice in extra records", + &account_id + )); + } + r.set_account(account.amount(), account.locked(), num_bytes_account); + } + } + } + StateRecord::AccessKey { account_id, public_key, access_key } => { + records.entry(account_id).or_default().keys.insert(public_key, access_key); + } + _ => { + result = Err(anyhow::anyhow!( + "FIXME: only Account and AccessKey records are supported in --extra-records" + )); + } + }; + }) + .context("Failed deserializing records from --extra-records")?; + + Ok(records) +} + +fn wanted_records>( + validators: &[AccountInfo], + extra_records: Option

, + num_bytes_account: u64, +) -> anyhow::Result> { + let mut records = validator_records(validators, num_bytes_account)?; + + if let Some(path) = extra_records { + let extra = parse_extra_records(path, num_bytes_account)?; + + for (account_id, account_records) in extra { + match records.entry(account_id) { + hash_map::Entry::Occupied(mut e) => { + let validator_records = e.get_mut(); + + if let Some(account) = &account_records.account { + set_total_balance(validator_records.account.as_mut().unwrap(), account); + validator_records.amount_needed = false; + } + validator_records.keys.extend(account_records.keys); + } + hash_map::Entry::Vacant(e) => { + e.insert(account_records); + } + } + } + } + + Ok(records) +} + +#[derive(Default)] +pub struct GenesisChanges { + pub chain_id: Option, + pub protocol_version: Option, + pub num_seats: Option, + pub epoch_length: Option, + pub transaction_validity_period: Option, + pub protocol_reward_rate: Option, + pub block_producer_kickout_threshold: Option, + pub chunk_producer_kickout_threshold: Option, +} + +/// Amend a genesis/records file created by `dump-state`. +pub fn amend_genesis>( + genesis_file_in: P, + genesis_file_out: P, + records_file_in: P, + records_file_out: P, + extra_records: Option

, + validators: P, + shard_layout_file: Option

, + genesis_changes: &GenesisChanges, + num_bytes_account: u64, + num_extra_bytes_record: u64, +) -> anyhow::Result<()> { + let mut genesis = Genesis::from_file(genesis_file_in, GenesisValidationMode::UnsafeFast); + + let shard_layout = if let Some(path) = shard_layout_file { + let s = std::fs::read_to_string(path).context("failed reading from --shard-layout-file")?; + Some( + serde_json::from_str::(&s) + .context("failed deserializing --shard-layout-file")?, + ) + } else { + None + }; + + let reader = + BufReader::new(File::open(records_file_in).context("Failed opening --records-file-in")?); + let records_out = BufWriter::new( + File::create(records_file_out).context("Failed opening --records-file-out")?, + ); + let mut records_ser = serde_json::Serializer::new(records_out); + let mut records_seq = records_ser.serialize_seq(None).unwrap(); + + let validators = parse_validators(validators)?; + let mut wanted = wanted_records(&validators, extra_records, num_bytes_account)?; + let mut total_supply = 0; + + near_chain_configs::stream_records_from_file(reader, |mut r| { + match &mut r { + StateRecord::AccessKey { account_id, public_key, access_key } => { + if let Some(a) = wanted.get_mut(account_id) { + if let Some(a) = a.keys.remove(public_key) { + *access_key = a; + } + } + records_seq.serialize_element(&r).unwrap(); + } + StateRecord::Account { account_id, account } => { + if let Some(acc) = wanted.get_mut(account_id) { + acc.update_from_existing(account); + } else { + if account.locked() != 0 { + account.set_amount(account.amount() + account.locked()); + account.set_locked(0); + } + total_supply += account.amount() + account.locked(); + records_seq.serialize_element(&r).unwrap(); + } + } + _ => { + records_seq.serialize_element(&r).unwrap(); + } + }; + })?; + + for (account_id, records) in wanted { + records.write_out( + account_id, + &mut records_seq, + &mut total_supply, + num_extra_bytes_record, + )?; + } + + genesis.config.total_supply = total_supply; + // TODO: give an option to set this + genesis.config.num_block_producer_seats = validators.len() as NumSeats; + // here we have already checked that there are no duplicate validators in wanted_records() + genesis.config.validators = validators; + if let Some(chain_id) = &genesis_changes.chain_id { + genesis.config.chain_id = chain_id.clone(); + } + if let Some(n) = genesis_changes.num_seats { + genesis.config.num_block_producer_seats = n; + } + if let Some(l) = shard_layout { + genesis.config.avg_hidden_validator_seats_per_shard = + (0..l.num_shards()).map(|_| 0).collect(); + genesis.config.num_block_producer_seats_per_shard = + utils::get_num_seats_per_shard(l.num_shards(), genesis.config.num_block_producer_seats); + genesis.config.shard_layout = l; + } + if let Some(v) = genesis_changes.protocol_version { + genesis.config.protocol_version = v; + } + if let Some(l) = genesis_changes.epoch_length { + genesis.config.epoch_length = l; + } + if let Some(t) = genesis_changes.transaction_validity_period { + genesis.config.transaction_validity_period = t; + } + if let Some(r) = genesis_changes.protocol_reward_rate { + genesis.config.protocol_reward_rate = r; + } + if let Some(t) = genesis_changes.block_producer_kickout_threshold { + genesis.config.block_producer_kickout_threshold = t; + } + if let Some(t) = genesis_changes.chunk_producer_kickout_threshold { + genesis.config.chunk_producer_kickout_threshold = t; + } + genesis.config.protocol_reward_rate = num_rational::Rational32::new(1, 10); + genesis.config.block_producer_kickout_threshold = 10; + genesis.to_file(genesis_file_out); + records_seq.end()?; + Ok(()) +} + +#[cfg(test)] +mod test { + use anyhow::Context; + use near_chain_configs::{get_initial_supply, Genesis, GenesisConfig}; + use near_primitives::hash::CryptoHash; + use near_primitives::shard_layout::ShardLayout; + use near_primitives::state_record::StateRecord; + use near_primitives::time::Clock; + use near_primitives::types::{AccountId, AccountInfo}; + use near_primitives::utils; + use near_primitives::version::PROTOCOL_VERSION; + use near_primitives_core::account::{AccessKey, Account}; + use near_primitives_core::types::{Balance, StorageUsage}; + use num_rational::Rational32; + use std::collections::HashSet; + use std::str::FromStr; + use tempfile::NamedTempFile; + + // these (TestAccountInfo, TestStateRecord, and ParsedTestCase) are here so we can + // have all static data in the testcases below + struct TestAccountInfo { + account_id: &'static str, + public_key: &'static str, + amount: Balance, + } + + impl TestAccountInfo { + fn parse(&self) -> AccountInfo { + AccountInfo { + account_id: self.account_id.parse().unwrap(), + public_key: self.public_key.parse().unwrap(), + amount: self.amount, + } + } + } + + enum TestStateRecord { + Account { + account_id: &'static str, + amount: Balance, + locked: Balance, + /// Storage used by the given account, includes account id, this struct, access keys and other data. + storage_usage: StorageUsage, + }, + AccessKey { + account_id: &'static str, + public_key: &'static str, + }, + } + + impl TestStateRecord { + fn parse(&self) -> StateRecord { + match &self { + Self::Account { account_id, amount, locked, storage_usage } => { + let account = + Account::new(*amount, *locked, CryptoHash::default(), *storage_usage); + StateRecord::Account { account_id: account_id.parse().unwrap(), account } + } + Self::AccessKey { account_id, public_key } => StateRecord::AccessKey { + account_id: account_id.parse().unwrap(), + public_key: public_key.parse().unwrap(), + access_key: AccessKey::full_access(), + }, + } + } + } + + struct ParsedTestCase { + genesis: Genesis, + records_file_in: NamedTempFile, + validators_in: Vec, + extra_records: Vec, + wanted_records: Vec, + } + + struct TestCase { + // for convenience, the validators set in the initial genesis file, matching + // the accounts in records_in with nonzero `locked` + initial_validators: &'static [TestAccountInfo], + // records to put in the --records-file-in file + records_in: &'static [TestStateRecord], + // account infos to put in the --validators file + validators_in: &'static [TestAccountInfo], + // records to put in the --extra-records file + extra_records: &'static [TestStateRecord], + // the records we want to appear in the output + wanted_records: &'static [TestStateRecord], + } + + fn compare_records( + got_records: Vec, + wanted_records: Vec, + ) -> anyhow::Result<()> { + let mut got_accounts = HashSet::new(); + let mut got_keys = HashSet::new(); + let mut wanted_accounts = HashSet::new(); + let mut wanted_keys = HashSet::new(); + + for r in got_records { + match r { + StateRecord::Account { account_id, account } => { + if !got_accounts.insert(( + account_id.clone(), + account.amount(), + account.locked(), + account.code_hash(), + account.storage_usage(), + )) { + anyhow::bail!("two account records in the output for {}", &account_id); + } + } + StateRecord::AccessKey { account_id, public_key, access_key } => { + if !got_keys.insert((account_id.clone(), public_key.clone(), access_key)) { + anyhow::bail!( + "two access key records in the output for {}, {}", + &account_id, + &public_key + ); + } + } + _ => anyhow::bail!("got an unexpected record in the output: {}", r), + }; + } + for r in wanted_records { + match r { + StateRecord::Account { account_id, account } => { + wanted_accounts.insert(( + account_id, + account.amount(), + account.locked(), + account.code_hash(), + account.storage_usage(), + )); + } + StateRecord::AccessKey { account_id, public_key, access_key } => { + wanted_keys.insert((account_id, public_key, access_key)); + } + _ => anyhow::bail!("got an unexpected record in the output: {}", r), + }; + } + + assert_eq!(got_accounts, wanted_accounts); + assert_eq!(got_keys, wanted_keys); + Ok(()) + } + + impl TestCase { + fn parse(&self) -> anyhow::Result { + let initial_validators = self.initial_validators.iter().map(|v| v.parse()).collect(); + let records_in: Vec<_> = self.records_in.iter().map(|r| r.parse()).collect(); + + let num_shards = 4; + let shards = ShardLayout::v1( + (0..num_shards - 1) + .map(|f| AccountId::from_str(format!("shard{}.test.near", f).as_str()).unwrap()) + .collect(), + vec![], + None, + 1, + ); + + let genesis_config = GenesisConfig { + protocol_version: PROTOCOL_VERSION, + genesis_time: Clock::utc(), + chain_id: "rusttestnet".to_string(), + genesis_height: 0, + num_block_producer_seats: nearcore::config::NUM_BLOCK_PRODUCER_SEATS, + num_block_producer_seats_per_shard: utils::get_num_seats_per_shard( + num_shards, + nearcore::config::NUM_BLOCK_PRODUCER_SEATS, + ), + avg_hidden_validator_seats_per_shard: (0..num_shards).map(|_| 0).collect(), + dynamic_resharding: false, + protocol_upgrade_stake_threshold: + nearcore::config::PROTOCOL_UPGRADE_STAKE_THRESHOLD, + protocol_upgrade_num_epochs: nearcore::config::PROTOCOL_UPGRADE_NUM_EPOCHS, + epoch_length: 1000, + gas_limit: nearcore::config::INITIAL_GAS_LIMIT, + gas_price_adjustment_rate: nearcore::config::GAS_PRICE_ADJUSTMENT_RATE, + block_producer_kickout_threshold: + nearcore::config::BLOCK_PRODUCER_KICKOUT_THRESHOLD, + chunk_producer_kickout_threshold: + nearcore::config::CHUNK_PRODUCER_KICKOUT_THRESHOLD, + online_max_threshold: Rational32::new(99, 100), + online_min_threshold: Rational32::new( + nearcore::config::BLOCK_PRODUCER_KICKOUT_THRESHOLD as i32, + 100, + ), + validators: initial_validators, + transaction_validity_period: nearcore::config::TRANSACTION_VALIDITY_PERIOD, + protocol_reward_rate: nearcore::config::PROTOCOL_REWARD_RATE, + max_inflation_rate: nearcore::config::MAX_INFLATION_RATE, + total_supply: get_initial_supply(&records_in), + num_blocks_per_year: nearcore::config::NUM_BLOCKS_PER_YEAR, + protocol_treasury_account: "treasury.near".parse().unwrap(), + fishermen_threshold: nearcore::config::FISHERMEN_THRESHOLD, + shard_layout: shards, + min_gas_price: nearcore::config::MIN_GAS_PRICE, + ..Default::default() + }; + + let mut records_file_in = + tempfile::NamedTempFile::new().context("failed creating tmp file")?; + serde_json::to_writer(&mut records_file_in, &records_in) + .context("failed writing to --records-file-in")?; + let genesis = Genesis::new_with_path(genesis_config, records_file_in.path()); + + Ok(ParsedTestCase { + genesis, + records_file_in, + validators_in: self.validators_in.iter().map(|v| v.parse()).collect(), + extra_records: self.extra_records.iter().map(|r| r.parse()).collect(), + wanted_records: self.wanted_records.iter().map(|r| r.parse()).collect(), + }) + } + + // take the records in the test case and write them to temp files, and then call amend_genesis() and + // check that the resulting genesis and records files match what's in self.want_records + // right now we aren't testing that other kinds of records appearing in the input records file + // will make it into the output, but that part is pretty simple + fn run(&self) -> anyhow::Result<()> { + let ParsedTestCase { + genesis, + records_file_in, + validators_in, + extra_records, + wanted_records, + } = self.parse()?; + + let mut genesis_file_in = + tempfile::NamedTempFile::new().context("failed creating tmp file")?; + let mut validators_file = + tempfile::NamedTempFile::new().context("failed creating tmp file")?; + let mut extra_records_file = + tempfile::NamedTempFile::new().context("failed creating tmp file")?; + let genesis_file_out = + tempfile::NamedTempFile::new().context("failed creating tmp file")?; + let records_file_out = + tempfile::NamedTempFile::new().context("failed creating tmp file")?; + + serde_json::to_writer(&mut validators_file, &validators_in) + .context("failed writing to --validators")?; + serde_json::to_writer(&mut extra_records_file, &extra_records) + .context("failed writing to --extra-records")?; + serde_json::to_writer(&mut genesis_file_in, &genesis) + .context("failed writing to --genesis-file-in")?; + + crate::amend_genesis( + genesis_file_in.path(), + genesis_file_out.path(), + records_file_in.path(), + records_file_out.path(), + Some(extra_records_file.path()), + validators_file.path(), + None, + &crate::GenesisChanges::default(), + 100, + 40, + ) + .context("amend_genesis() failed")?; + + let got_records = std::fs::read_to_string(records_file_out.path()) + .context("failed reading from --records-file-out")?; + let got_records: Vec = serde_json::from_str(&got_records) + .context("failed deserializing --records-file-out")?; + + compare_records(got_records, wanted_records) + } + } + + static TEST_CASES: &[TestCase] = &[ + // first one adds one validator (foo2), bumps up another's balance (foo0), and adds an extra account (extra-account.near) + TestCase { + initial_validators: &[ + TestAccountInfo { + account_id: "foo0", + public_key: "ed25519:He7QeRuwizNEhBioYG3u4DZ8jWXyETiyNzFD3MkTjDMf", + amount: 1_000_000, + }, + TestAccountInfo { + account_id: "foo1", + public_key: "ed25519:FXXrTXiKWpXj1R6r5fBvMLpstd8gPyrBq3qMByqKVzKF", + amount: 2_000_000, + }, + ], + records_in: &[ + TestStateRecord::Account { + account_id: "foo0", + amount: 1_000_000, + locked: 1_000_000, + storage_usage: 182, + }, + TestStateRecord::AccessKey { + account_id: "foo0", + public_key: "ed25519:He7QeRuwizNEhBioYG3u4DZ8jWXyETiyNzFD3MkTjDMf", + }, + TestStateRecord::Account { + account_id: "foo1", + amount: 1_000_000, + locked: 2_000_000, + storage_usage: 182, + }, + TestStateRecord::AccessKey { + account_id: "foo1", + public_key: "ed25519:FXXrTXiKWpXj1R6r5fBvMLpstd8gPyrBq3qMByqKVzKF", + }, + TestStateRecord::Account { + account_id: "asdf.near", + amount: 1_234_000, + locked: 0, + storage_usage: 182, + }, + TestStateRecord::AccessKey { + account_id: "asdf.near", + public_key: "ed25519:5C66RSJgwK17Yb6VtTbgBCFHDRPzGUd6AAhFdXNvmJuo", + }, + ], + validators_in: &[ + TestAccountInfo { + account_id: "foo0", + public_key: "ed25519:He7QeRuwizNEhBioYG3u4DZ8jWXyETiyNzFD3MkTjDMf", + amount: 1_000_000, + }, + TestAccountInfo { + account_id: "foo1", + public_key: "ed25519:FXXrTXiKWpXj1R6r5fBvMLpstd8gPyrBq3qMByqKVzKF", + amount: 2_000_000, + }, + TestAccountInfo { + account_id: "foo2", + public_key: "ed25519:Eo9W44tRMwcYcoua11yM7Xfr1DjgR4EWQFM3RU27MEX8", + amount: 3_000_000, + }, + ], + extra_records: &[ + TestStateRecord::Account { + account_id: "foo0", + amount: 100_000_000, + locked: 50_000_000, + storage_usage: 0, + }, + TestStateRecord::Account { + account_id: "extra-account.near", + amount: 9_000_000, + locked: 0, + storage_usage: 0, + }, + TestStateRecord::AccessKey { + account_id: "extra-account.near", + public_key: "ed25519:BhnQV3oJa8iSQDKDc8gy36TsenaMFmv7qHvcnutuXj33", + }, + ], + wanted_records: &[ + TestStateRecord::Account { + account_id: "foo0", + amount: 149_000_000, + locked: 1_000_000, + storage_usage: 182, + }, + TestStateRecord::AccessKey { + account_id: "foo0", + public_key: "ed25519:He7QeRuwizNEhBioYG3u4DZ8jWXyETiyNzFD3MkTjDMf", + }, + TestStateRecord::Account { + account_id: "foo1", + amount: 1_000_000, + locked: 2_000_000, + storage_usage: 182, + }, + TestStateRecord::AccessKey { + account_id: "foo1", + public_key: "ed25519:FXXrTXiKWpXj1R6r5fBvMLpstd8gPyrBq3qMByqKVzKF", + }, + TestStateRecord::Account { + account_id: "foo2", + amount: 10_000 * nearcore::config::NEAR_BASE, + locked: 3_000_000, + storage_usage: 182, + }, + TestStateRecord::AccessKey { + account_id: "foo2", + public_key: "ed25519:Eo9W44tRMwcYcoua11yM7Xfr1DjgR4EWQFM3RU27MEX8", + }, + TestStateRecord::Account { + account_id: "asdf.near", + amount: 1_234_000, + locked: 0, + storage_usage: 182, + }, + TestStateRecord::AccessKey { + account_id: "asdf.near", + public_key: "ed25519:5C66RSJgwK17Yb6VtTbgBCFHDRPzGUd6AAhFdXNvmJuo", + }, + TestStateRecord::Account { + account_id: "extra-account.near", + amount: 9_000_000, + locked: 0, + storage_usage: 182, + }, + TestStateRecord::AccessKey { + account_id: "extra-account.near", + public_key: "ed25519:BhnQV3oJa8iSQDKDc8gy36TsenaMFmv7qHvcnutuXj33", + }, + ], + }, + // this one changes the validator set completely, and adds an extra accounts and keys + TestCase { + initial_validators: &[ + TestAccountInfo { + account_id: "foo0", + public_key: "ed25519:He7QeRuwizNEhBioYG3u4DZ8jWXyETiyNzFD3MkTjDMf", + amount: 1_000_000, + }, + TestAccountInfo { + account_id: "foo1", + public_key: "ed25519:FXXrTXiKWpXj1R6r5fBvMLpstd8gPyrBq3qMByqKVzKF", + amount: 2_000_000, + }, + ], + validators_in: &[ + TestAccountInfo { + account_id: "foo2", + public_key: "ed25519:He7QeRuwizNEhBioYG3u4DZ8jWXyETiyNzFD3MkTjDMf", + amount: 1_000_000, + }, + TestAccountInfo { + account_id: "foo3", + public_key: "ed25519:FXXrTXiKWpXj1R6r5fBvMLpstd8gPyrBq3qMByqKVzKF", + amount: 2_000_000, + }, + ], + records_in: &[ + TestStateRecord::Account { + account_id: "foo0", + amount: 1_000_000, + locked: 1_000_000, + storage_usage: 182, + }, + TestStateRecord::AccessKey { + account_id: "foo0", + public_key: "ed25519:He7QeRuwizNEhBioYG3u4DZ8jWXyETiyNzFD3MkTjDMf", + }, + TestStateRecord::Account { + account_id: "foo1", + amount: 1_000_000, + locked: 2_000_000, + storage_usage: 182, + }, + TestStateRecord::AccessKey { + account_id: "foo1", + public_key: "ed25519:FXXrTXiKWpXj1R6r5fBvMLpstd8gPyrBq3qMByqKVzKF", + }, + TestStateRecord::Account { + account_id: "asdf.near", + amount: 1_234_000, + locked: 0, + storage_usage: 182, + }, + TestStateRecord::AccessKey { + account_id: "asdf.near", + public_key: "ed25519:5C66RSJgwK17Yb6VtTbgBCFHDRPzGUd6AAhFdXNvmJuo", + }, + ], + extra_records: &[ + TestStateRecord::Account { + account_id: "foo0", + amount: 100_000_000, + locked: 0, + storage_usage: 0, + }, + TestStateRecord::Account { + account_id: "foo2", + amount: 300_000_000, + locked: 0, + storage_usage: 0, + }, + TestStateRecord::AccessKey { + account_id: "foo0", + public_key: "ed25519:FXXrTXiKWpXj1R6r5fBvMLpstd8gPyrBq3qMByqKVzKF", + }, + TestStateRecord::AccessKey { + account_id: "foo1", + public_key: "ed25519:He7QeRuwizNEhBioYG3u4DZ8jWXyETiyNzFD3MkTjDMf", + }, + TestStateRecord::Account { + account_id: "extra-account.near", + amount: 9_000_000, + locked: 0, + storage_usage: 0, + }, + TestStateRecord::AccessKey { + account_id: "extra-account.near", + public_key: "ed25519:BhnQV3oJa8iSQDKDc8gy36TsenaMFmv7qHvcnutuXj33", + }, + ], + wanted_records: &[ + TestStateRecord::Account { + account_id: "foo0", + amount: 100_000_000, + locked: 0, + storage_usage: 264, + }, + TestStateRecord::AccessKey { + account_id: "foo0", + public_key: "ed25519:He7QeRuwizNEhBioYG3u4DZ8jWXyETiyNzFD3MkTjDMf", + }, + TestStateRecord::AccessKey { + account_id: "foo0", + public_key: "ed25519:FXXrTXiKWpXj1R6r5fBvMLpstd8gPyrBq3qMByqKVzKF", + }, + TestStateRecord::Account { + account_id: "foo1", + amount: 3_000_000, + locked: 0, + storage_usage: 264, + }, + TestStateRecord::AccessKey { + account_id: "foo1", + public_key: "ed25519:FXXrTXiKWpXj1R6r5fBvMLpstd8gPyrBq3qMByqKVzKF", + }, + TestStateRecord::AccessKey { + account_id: "foo1", + public_key: "ed25519:He7QeRuwizNEhBioYG3u4DZ8jWXyETiyNzFD3MkTjDMf", + }, + TestStateRecord::Account { + account_id: "foo2", + amount: 299_000_000, + locked: 1_000_000, + storage_usage: 182, + }, + TestStateRecord::AccessKey { + account_id: "foo2", + public_key: "ed25519:He7QeRuwizNEhBioYG3u4DZ8jWXyETiyNzFD3MkTjDMf", + }, + TestStateRecord::Account { + account_id: "foo3", + amount: 10_000 * nearcore::config::NEAR_BASE, + locked: 2_000_000, + storage_usage: 182, + }, + TestStateRecord::AccessKey { + account_id: "foo3", + public_key: "ed25519:FXXrTXiKWpXj1R6r5fBvMLpstd8gPyrBq3qMByqKVzKF", + }, + TestStateRecord::Account { + account_id: "asdf.near", + amount: 1_234_000, + locked: 0, + storage_usage: 182, + }, + TestStateRecord::AccessKey { + account_id: "asdf.near", + public_key: "ed25519:5C66RSJgwK17Yb6VtTbgBCFHDRPzGUd6AAhFdXNvmJuo", + }, + TestStateRecord::Account { + account_id: "extra-account.near", + amount: 9_000_000, + locked: 0, + storage_usage: 182, + }, + TestStateRecord::AccessKey { + account_id: "extra-account.near", + public_key: "ed25519:BhnQV3oJa8iSQDKDc8gy36TsenaMFmv7qHvcnutuXj33", + }, + ], + }, + ]; + + #[test] + fn test_amend_genesis() { + for t in TEST_CASES.iter() { + t.run().unwrap(); + } + } +} From 2a2f602de1284034894b1200d8f21d3508e3c51c Mon Sep 17 00:00:00 2001 From: posvyatokum Date: Thu, 27 Oct 2022 15:33:07 +0100 Subject: [PATCH 042/103] store: adding transactions & receipts columns to cold storage (#7943) --- core/store/src/cold_storage.rs | 45 +++++++++++++++ core/store/src/columns.rs | 11 +++- .../src/tests/client/cold_storage.rs | 57 ++++++++++--------- 3 files changed, 86 insertions(+), 27 deletions(-) diff --git a/core/store/src/cold_storage.rs b/core/store/src/cold_storage.rs index b022cab7c58..328443a41bb 100644 --- a/core/store/src/cold_storage.rs +++ b/core/store/src/cold_storage.rs @@ -4,7 +4,10 @@ use crate::trie::TrieRefcountChange; use crate::{DBCol, DBTransaction, Database, Store, TrieChanges}; use borsh::BorshDeserialize; +use near_primitives::block::Block; +use near_primitives::hash::CryptoHash; use near_primitives::shard_layout::ShardLayout; +use near_primitives::sharding::ShardChunk; use near_primitives::types::BlockHeight; use std::collections::HashMap; use std::io; @@ -139,11 +142,23 @@ fn get_keys_from_store( let height_key = height.to_le_bytes(); let block_hash_key = store.get_or_err(DBCol::BlockHeight, &height_key)?.as_slice().to_vec(); + let block: Block = store.get_ser_or_err(DBCol::Block, &block_hash_key)?; + let chunks = block + .chunks() + .iter() + .map(|chunk_header| { + store.get_ser_or_err(DBCol::Chunks, chunk_header.chunk_hash().as_bytes()) + }) + .collect::>>()?; + for key_type in DBKeyType::iter() { key_type_to_keys.insert( key_type, match key_type { DBKeyType::BlockHash => vec![block_hash_key.clone()], + DBKeyType::ShardId => { + (0..shard_layout.num_shards()).map(|si| si.to_le_bytes().to_vec()).collect() + } DBKeyType::ShardUId => shard_layout .get_shard_uids() .iter() @@ -187,6 +202,36 @@ fn get_keys_from_store( )?; keys } + DBKeyType::TransactionHash => chunks + .iter() + .flat_map(|c| c.transactions().iter().map(|t| t.get_hash().as_bytes().to_vec())) + .collect(), + DBKeyType::ReceiptHash => chunks + .iter() + .flat_map(|c| c.receipts().iter().map(|r| r.get_hash().as_bytes().to_vec())) + .collect(), + DBKeyType::OutcomeId => { + debug_assert_eq!( + DBCol::OutcomeIds.key_type(), + &[DBKeyType::BlockHash, DBKeyType::ShardId] + ); + (0..shard_layout.num_shards()) + .map(|shard_id| { + store.get_ser( + DBCol::OutcomeIds, + &join_two_keys(&block_hash_key, &shard_id.to_le_bytes()), + ) + }) + .collect::>>>>()? + .into_iter() + .flat_map(|hashes| { + hashes + .unwrap_or_default() + .into_iter() + .map(|hash| hash.as_bytes().to_vec()) + }) + .collect() + } _ => { vec![] } diff --git a/core/store/src/columns.rs b/core/store/src/columns.rs index 7619fc057d8..cb4ff032a26 100644 --- a/core/store/src/columns.rs +++ b/core/store/src/columns.rs @@ -368,7 +368,16 @@ impl DBCol { /// Whether this column should be copied to the cold storage. pub const fn is_cold(&self) -> bool { match self { - DBCol::Block | DBCol::State | DBCol::StateChanges => true, + DBCol::Block + | DBCol::IncomingReceipts + | DBCol::OutcomeIds + | DBCol::OutgoingReceipts + | DBCol::ReceiptIdToShardId + | DBCol::Receipts + | DBCol::State + | DBCol::StateChanges + | DBCol::TransactionResultForBlock + | DBCol::Transactions => true, _ => false, } } diff --git a/integration-tests/src/tests/client/cold_storage.rs b/integration-tests/src/tests/client/cold_storage.rs index a42c2bbdde6..97d0ee9750f 100644 --- a/integration-tests/src/tests/client/cold_storage.rs +++ b/integration-tests/src/tests/client/cold_storage.rs @@ -77,32 +77,37 @@ fn test_storage_after_commit_of_cold_update() { ); env.clients[0].process_tx(tx, false, false); } - for i in 0..5 { - let tx = SignedTransaction::from_actions( - h * 10 + i, - "test0".parse().unwrap(), - "test0".parse().unwrap(), - &signer, - vec![Action::FunctionCall(FunctionCallAction { - method_name: "write_random_value".to_string(), - args: vec![], - gas: 100_000_000_000_000, - deposit: 0, - })], - last_hash, - ); - env.clients[0].process_tx(tx, false, false); - } - for i in 0..5 { - let tx = SignedTransaction::send_money( - h * 10 + i, - "test0".parse().unwrap(), - "test0".parse().unwrap(), - &signer, - 1, - last_hash, - ); - env.clients[0].process_tx(tx, false, false); + // Don't send transactions in last two blocks. Because on last block production a chunk from + // the next block will be produced and information about these transactions will be written + // into db. And it is a PAIN to filter it out, especially for Receipts. + if h + 2 < max_height { + for i in 0..5 { + let tx = SignedTransaction::from_actions( + h * 10 + i, + "test0".parse().unwrap(), + "test0".parse().unwrap(), + &signer, + vec![Action::FunctionCall(FunctionCallAction { + method_name: "write_random_value".to_string(), + args: vec![], + gas: 100_000_000_000_000, + deposit: 0, + })], + last_hash, + ); + env.clients[0].process_tx(tx, false, false); + } + for i in 0..5 { + let tx = SignedTransaction::send_money( + h * 10 + i, + "test0".parse().unwrap(), + "test1".parse().unwrap(), + &signer, + 1, + last_hash, + ); + env.clients[0].process_tx(tx, false, false); + } } let block = env.clients[0].produce_block(h).unwrap().unwrap(); From 79c1fc18ed6466574c15d19de61e2a078cbc149f Mon Sep 17 00:00:00 2001 From: Michal Nazarewicz Date: Thu, 27 Oct 2022 16:27:28 +0100 Subject: [PATCH 043/103] chain: flatten match pattern to reduce indentation depth (#7945) --- chain/client/src/view_client.rs | 92 +++++++++++++++------------------ 1 file changed, 43 insertions(+), 49 deletions(-) diff --git a/chain/client/src/view_client.rs b/chain/client/src/view_client.rs index a2a595fbe2b..699d602a957 100644 --- a/chain/client/src/view_client.rs +++ b/chain/client/src/view_client.rs @@ -873,59 +873,53 @@ impl Handler> for ViewClientActor { &outcome_proof.block_hash, target_shard_id, )?; - match res { - Some((h, target_shard_id)) => { - outcome_proof.block_hash = h; - // Here we assume the number of shards is small so this reconstruction - // should be fast - let outcome_roots = self - .chain - .get_block(&h)? - .chunks() - .iter() - .map(|header| header.outcome_root()) - .collect::>(); - if target_shard_id >= (outcome_roots.len() as u64) { - return Err(GetExecutionOutcomeError::InconsistentState { - number_or_shards: outcome_roots.len(), - execution_outcome_shard_id: target_shard_id, - }); - } - Ok(GetExecutionOutcomeResponse { - outcome_proof: outcome_proof.into(), - outcome_root_proof: merklize(&outcome_roots).1 - [target_shard_id as usize] - .clone(), - }) + if let Some((h, target_shard_id)) = res { + outcome_proof.block_hash = h; + // Here we assume the number of shards is small so this reconstruction + // should be fast + let outcome_roots = self + .chain + .get_block(&h)? + .chunks() + .iter() + .map(|header| header.outcome_root()) + .collect::>(); + if target_shard_id >= (outcome_roots.len() as u64) { + return Err(GetExecutionOutcomeError::InconsistentState { + number_or_shards: outcome_roots.len(), + execution_outcome_shard_id: target_shard_id, + }); } - None => Err(GetExecutionOutcomeError::NotConfirmed { - transaction_or_receipt_id: id, - }), + Ok(GetExecutionOutcomeResponse { + outcome_proof: outcome_proof.into(), + outcome_root_proof: merklize(&outcome_roots).1[target_shard_id as usize] + .clone(), + }) + } else { + Err(GetExecutionOutcomeError::NotConfirmed { transaction_or_receipt_id: id }) } } - Err(e) => match e { - near_chain::Error::DBNotFoundErr(_) => { - let head = self.chain.head()?; - let target_shard_id = - self.runtime_adapter.account_id_to_shard_id(&account_id, &head.epoch_id)?; - if self.runtime_adapter.cares_about_shard( - self.validator_account_id.as_ref(), - &head.last_block_hash, - target_shard_id, - true, - ) { - Err(GetExecutionOutcomeError::UnknownTransactionOrReceipt { - transaction_or_receipt_id: id, - }) - } else { - Err(GetExecutionOutcomeError::UnavailableShard { - transaction_or_receipt_id: id, - shard_id: target_shard_id, - }) - } + Err(near_chain::Error::DBNotFoundErr(_)) => { + let head = self.chain.head()?; + let target_shard_id = + self.runtime_adapter.account_id_to_shard_id(&account_id, &head.epoch_id)?; + if self.runtime_adapter.cares_about_shard( + self.validator_account_id.as_ref(), + &head.last_block_hash, + target_shard_id, + true, + ) { + Err(GetExecutionOutcomeError::UnknownTransactionOrReceipt { + transaction_or_receipt_id: id, + }) + } else { + Err(GetExecutionOutcomeError::UnavailableShard { + transaction_or_receipt_id: id, + shard_id: target_shard_id, + }) } - _ => Err(e.into()), - }, + } + Err(err) => Err(err.into()), } } } From 2629a4d55d761281c5bcef49fbd3d9301e99ac17 Mon Sep 17 00:00:00 2001 From: posvyatokum Date: Thu, 27 Oct 2022 17:42:48 +0100 Subject: [PATCH 044/103] store: adding final cold columns (#7950) --- core/store/src/cold_storage.rs | 6 +++ core/store/src/columns.rs | 7 +++ .../src/tests/client/cold_storage.rs | 47 +++++++++++++++++-- 3 files changed, 55 insertions(+), 5 deletions(-) diff --git a/core/store/src/cold_storage.rs b/core/store/src/cold_storage.rs index 328443a41bb..2526c9c015d 100644 --- a/core/store/src/cold_storage.rs +++ b/core/store/src/cold_storage.rs @@ -156,6 +156,9 @@ fn get_keys_from_store( key_type, match key_type { DBKeyType::BlockHash => vec![block_hash_key.clone()], + DBKeyType::PreviousBlockHash => { + vec![block.header().prev_hash().as_bytes().to_vec()] + } DBKeyType::ShardId => { (0..shard_layout.num_shards()).map(|si| si.to_le_bytes().to_vec()).collect() } @@ -210,6 +213,9 @@ fn get_keys_from_store( .iter() .flat_map(|c| c.receipts().iter().map(|r| r.get_hash().as_bytes().to_vec())) .collect(), + DBKeyType::ChunkHash => { + chunks.iter().map(|c| c.chunk_hash().as_bytes().to_vec()).collect() + } DBKeyType::OutcomeId => { debug_assert_eq!( DBCol::OutcomeIds.key_type(), diff --git a/core/store/src/columns.rs b/core/store/src/columns.rs index cb4ff032a26..36e8aa90868 100644 --- a/core/store/src/columns.rs +++ b/core/store/src/columns.rs @@ -369,13 +369,20 @@ impl DBCol { pub const fn is_cold(&self) -> bool { match self { DBCol::Block + | DBCol::BlockExtra + | DBCol::BlockInfo + | DBCol::ChunkExtra + | DBCol::Chunks | DBCol::IncomingReceipts + | DBCol::NextBlockHashes | DBCol::OutcomeIds | DBCol::OutgoingReceipts | DBCol::ReceiptIdToShardId | DBCol::Receipts | DBCol::State | DBCol::StateChanges + | DBCol::StateChangesForSplitStates + | DBCol::StateHeaders | DBCol::TransactionResultForBlock | DBCol::Transactions => true, _ => false, diff --git a/integration-tests/src/tests/client/cold_storage.rs b/integration-tests/src/tests/client/cold_storage.rs index 97d0ee9750f..75b561dd468 100644 --- a/integration-tests/src/tests/client/cold_storage.rs +++ b/integration-tests/src/tests/client/cold_storage.rs @@ -1,9 +1,11 @@ use crate::tests::client::process_blocks::create_nightshade_runtimes; +use borsh::BorshDeserialize; use near_chain::{ChainGenesis, Provenance}; use near_chain_configs::Genesis; use near_client::test_utils::TestEnv; use near_crypto::{InMemorySigner, KeyType}; use near_o11y::testonly::init_test_logger; +use near_primitives::sharding::ShardChunk; use near_primitives::transaction::{ Action, DeployContractAction, FunctionCallAction, SignedTransaction, }; @@ -22,11 +24,24 @@ fn check_key(first_store: &Store, second_store: &Store, col: DBCol, key: &[u8]) assert_eq!(first_res.unwrap(), second_res.unwrap()); } -fn check_iter(first_store: &Store, second_store: &Store, col: DBCol) -> u64 { +fn check_iter( + first_store: &Store, + second_store: &Store, + col: DBCol, + no_check_rules: &Vec) -> bool>>, +) -> u64 { let mut num_checks = 0; - for (key, _) in first_store.iter(col).map(Result::unwrap) { - check_key(first_store, second_store, col, &key); - num_checks += 1; + for (key, value) in first_store.iter(col).map(Result::unwrap) { + let mut check = true; + for no_check in no_check_rules { + if no_check(col, &value) { + check = false; + } + } + if check { + check_key(first_store, second_store, col, &key); + num_checks += 1; + } } num_checks } @@ -139,10 +154,32 @@ fn test_storage_after_commit_of_cold_update() { let cold_store = NodeStorage::new(cold_db).get_store(Temperature::Hot); + // We still need to filter out one chunk + let mut no_check_rules: Vec) -> bool>> = vec![]; + no_check_rules.push(Box::new(move |col, value| -> bool { + if col == DBCol::Chunks { + let chunk = ShardChunk::try_from_slice(&*value).unwrap(); + if *chunk.prev_block() == last_hash { + return true; + } + } + false + })); + for col in DBCol::iter() { if col.is_cold() { + let num_checks = check_iter( + &env.clients[0].runtime_adapter.store(), + &cold_store, + col, + &no_check_rules, + ); // assert that this test actually checks something - assert!(check_iter(&env.clients[0].runtime_adapter.store(), &cold_store, col) > 0); + assert!( + col == DBCol::StateChangesForSplitStates + || col == DBCol::StateHeaders + || num_checks > 0 + ); } } } From b5eba1637bf421c46405455955870cce50c11a14 Mon Sep 17 00:00:00 2001 From: Alex Kladov Date: Thu, 27 Oct 2022 18:09:20 +0100 Subject: [PATCH 045/103] docs: move docs about receipts from confluence (#7947) * docs: move docs about receipts from confluence * Apply suggestions from code review Co-authored-by: Akhilesh Singhania Co-authored-by: Akhilesh Singhania --- docs/SUMMARY.md | 1 + docs/architecture/how/tx_receipts.md | 144 +++++++++++++++++++++++++++ 2 files changed, 145 insertions(+) create mode 100644 docs/architecture/how/tx_receipts.md diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index f2b4d698905..ed319b37c81 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -10,6 +10,7 @@ - [Garbage Collection](./architecture/how/gc.md) - [How Epoch Works](./architecture/how/epoch.md) - [Transaction Routing](./architecture/how/tx_routing.md) + - [Transactions And Receipts](./architecture/how/tx_receipts.md) - [Trie](./architecture/trie.md) - [Network](./architecture/network.md) - [Gas Cost Parameters](./architecture/gas/README.md) diff --git a/docs/architecture/how/tx_receipts.md b/docs/architecture/how/tx_receipts.md new file mode 100644 index 00000000000..628a1a6347a --- /dev/null +++ b/docs/architecture/how/tx_receipts.md @@ -0,0 +1,144 @@ +# Transaction, Receipts adn Chunk Surprises + +We finished the previous article ([Transaction routing](./tx_routing.md)) on the +part, where transaction was successfully added to the soon-to-be block +producer’s mempool. + +In this article, we’ll cover what happens next: how it is changed into a receipt +and executed, potentially creating even more receipts in the process. + +First, let’s look at the ‘high level view’: + +![image](https://user-images.githubusercontent.com/1711539/198282472-3883dcc1-77ca-452c-b21e-0a7af1435ede.png) + +## Transaction vs receipt + +As you can see from the image above: + +**Transactions** are ‘external’ communication - they are coming from the +outside. + +**Receipts** are used for ‘internal’ communication (cross shard, cross +contract) - they are created by the block/chunk producers. + + +## Life of a Transaction + +If we ‘zoom-in', the chunk producer's work looks like this: + +![image](https://user-images.githubusercontent.com/1711539/198282518-cdeb375e-8f1c-4634-842c-6490020ad9c0.png) + + +### Step 1: Process Transaction into receipt + +Once a chunk producer is ready to produce a chunk, it will fetch the +transactions from its mempool, check that they are valid, and if so, prepare to +process them into receipts. + +**Note:** There are additional restrictions (e.g. making sure that we take them in +the right order, that we don’t take too many, etc.) - that you can see in +nomicon’s [transaction page](https://nomicon.io/ChainSpec/Transactions). + +You can see this part in explorer: + +![image](https://user-images.githubusercontent.com/1711539/198282561-c97235a1-93a1-4dc8-b6bc-ee9983376b2c.png) + +### Step 2: Sending receipt to the proper destination + +Once we have a receipt, we have to send it to the proper destination - by adding +it to the ‘outgoing_receipt’ list, which will be forwarded to the chunk +producers from the next block. + +**Note:** There is a special case here - if the sender of the receipt is the +same as the receiver, then the receipt will be added to the ‘local_receipts' +queue and executed in the same block. + +### Step 3: When incoming receipt arrives + +(Note: this happens in the ‘next’ block) + +When chunk producer receives the incoming receipt, it will try to execute its +actions (creating accounts, executing function calls etc). + +Such actions might generate additional receipts (for example a contract might +want to call other contracts). All these outputs are added to the outgoing +receipt queue to be executed in the next block. + +If there is incoming receipt queue is too large to execute in the current chunk, +the producer will put the remaining receipts onto the ‘delayed’ queue. + +### Step 4: Profit + +When all the ‘dependant’ receipts are executed for a given transaction, we can +consider the transaction to be successful. + +### [Advanced] But reality is more complex + +**Caution:** In the section below, some things are simplified and do not match exactly +to how the current code works. + +Let’s quickly also check what’s actually inside a Chunk: + +```rust +pub struct ShardChunkV2 { + pub chunk_hash: ChunkHash, + pub header: ShardChunkHeader, + pub transactions: Vec, + pub receipts: Vec, // outgoing receipts from 'previous' block +} +``` + +Yes, it is a little bit confusing, that receipts here are NOT the ‘incoming’ +ones for this chunk, but instead the ‘outgoing’ ones from the previous block. Why?!?! + +This has to do with performance. + +#### Simple approach + +First, let’s imagine how the system would look like, if chunk contained things +that we’d expect: + +* list of transactions +* list of incoming receipts +* list of outgoing receipts +* hash of the final state + +This means, that the chunk producer has to compute all this information first, +before sending the chunk to other validators. + +![image](https://user-images.githubusercontent.com/1711539/198282601-383977f1-08dd-45fe-aa19-70556d585034.png) + + +Once the other validators receive the chunk, they can start their own processing to +verify those outgoing receipts/final state - and then do the signing. Only then, +can the next chunk producer start creating the next chunk. + +While this approach does work, we can do it faster. + +#### Faster approach + +What if the chunk didn’t contain the ‘output’ state? This changes our ‘mental’ model +a little bit, as now when we’re singing the chunk, we’d actually be +verifying the previous chunk - but that’s the topic for the next article (TODO: +add future link to article about signatures and verification). + +For now, imagine if the chunk only had: + +* list of transactions +* list of incoming receipts + +In such a case, the chunk producer could send the chunk a lot earlier, and +validators (and chunk producer) could do their processing at the same time: + + +![image](https://user-images.githubusercontent.com/1711539/198282641-1e728088-6f2b-4cb9-90c9-5eb09304e72a.png) + + +Now the last mystery is: why do we have ‘outgoing’ receipts from previous chunk +rather than incoming to the current one? + +This is yet another optimization: this way the chunk producer can send out the +chunk a little bit earlier - without having to wait for all the other shards. + +But that’s the topic for another article (TODO: add future link to article about +chunk fragments etc) From 31e59094494b1d8409369350b74a030a5ac2f56a Mon Sep 17 00:00:00 2001 From: Alex Kladov Date: Thu, 27 Oct 2022 18:46:13 +0100 Subject: [PATCH 046/103] chore: introduce `--profile prod` (#7923) This solves two problems: * Makes it easy to produce a fully-optimized binary when doing lto-sensitive benchmarking * Makes it simpler for the estimator to build the binary with the right optimizations, by avoiding hard-coding config into the estimator. Ideally, we'd change the Makefile as well, but we'd rather not break existing `./target/release` layout. In other words, before we had two sources of truth: Makefile and estimator. Now they are Makefile and Cargo.toml, and we also gained a nice cli flag for building fully optimized version: $ cargo b --profile prod -p neard --bin neard cc https://github.com/near/nearcore/issues/6226 This PR is prompted by the recent confusion about lto: https://near.zulipchat.com/#narrow/stream/345766-pagoda.2Fstorage.2Fflat-storage/topic/Migration.20plan/near/305801512 I don't think this solves the problem, but hopefully existence of `--profile prod` would make it more obvious? Not super sure though, this is more like an RFC than "yes, I think we should have this" --- Cargo.toml | 13 +++++++++++-- Makefile | 2 -- docs/practices/fast_builds.md | 20 +++++++++++--------- runtime/runtime-params-estimator/src/main.rs | 13 ++++++------- 4 files changed, 28 insertions(+), 20 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 12983b5c60a..ac5592fca3a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -207,12 +207,21 @@ xz2 = "0.1.6" # "test" profile inherits from "dev" profile. # https://doc.rust-lang.org/cargo/reference/profiles.html#test +[profile.dev] +panic = 'abort' + [profile.release] overflow-checks = true panic = 'abort' +lto = "fat" +codegen-units = 1 + +# A much faster to compile version of `release`. +[profile.quick-release] +inherits = "release" +lto = false +codegen-units = 16 -[profile.dev] -panic = 'abort' # Compile some dependencies with optimizations to speed up tests. [profile.dev.package.hex] diff --git a/Makefile b/Makefile index fd3c144c40c..90e9d92188c 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,3 @@ -export CARGO_PROFILE_RELEASE_CODEGEN_UNITS = 1 -export CARGO_PROFILE_RELEASE_LTO = fat export DOCKER_BUILDKIT = 1 export CARGO_BUILD_RUSTFLAGS = -D warnings export NEAR_RELEASE_BUILD = no diff --git a/docs/practices/fast_builds.md b/docs/practices/fast_builds.md index d7e750f381a..bbc100dea6b 100644 --- a/docs/practices/fast_builds.md +++ b/docs/practices/fast_builds.md @@ -13,15 +13,17 @@ contains a section on compilation time as well! ## Release Builds and Link Time Optimization -Obviously, `cargo build --release` is slower than `cargo build`. What's not -entirely obvious is that `cargo build -r` is not as slow as it could be: our -`--release` profile is somewhat optimized for fast builds, as it doesn't enable -full LTO. - -When building production binaries, we use `lto=true` and `codegen-units=1` -options, which make the build significantly slower (but the resulting binary -somewhat faster). Keep this in mind when running benchmarks or parameter -estimation. +Obviously, `cargo build --release` is slower than `cargo build`. We enable full +lto (link time optimization), so our `-r` builds are very slow, use a lot of +RAM, and don't take advantage full of parallelism. + +As debug builds are much to slow at runtime for many purposes, we have a custom +profile `--profile quick-release` which is equivalent to `-r`, but doesn't do +`lto`. + +Use `--profile quick-release` when doing comparative benchmarking, or when +connecting a locally build node to a network. Use `-r` if you want to get +absolute performance numbers. ## Linker diff --git a/runtime/runtime-params-estimator/src/main.rs b/runtime/runtime-params-estimator/src/main.rs index 55f6d50b884..7cdec95332b 100644 --- a/runtime/runtime-params-estimator/src/main.rs +++ b/runtime/runtime-params-estimator/src/main.rs @@ -306,6 +306,7 @@ fn main_docker( json_output: bool, debug: bool, ) -> anyhow::Result<()> { + let profile = if full { "release" } else { "quick-release" }; exec("docker --version").context("please install `docker`")?; let project_root = project_root(); @@ -340,15 +341,17 @@ fn main_docker( #[cfg(feature = "nightly_protocol")] buf.push_str(",nightly_protocol"); - buf.push_str(" --release;"); + buf.push_str(" --profile "); + buf.push_str(profile); + buf.push_str(";"); let mut qemu_cmd_builder = QemuCommandBuilder::default(); if debug { qemu_cmd_builder = qemu_cmd_builder.plugin_log(true).print_on_every_close(true); } - let mut qemu_cmd = - qemu_cmd_builder.build("/host/nearcore/target/release/runtime-params-estimator")?; + let mut qemu_cmd = qemu_cmd_builder + .build(&format!("/host/nearcore/target/{profile}/runtime-params-estimator"))?; qemu_cmd.args(&["--home", "/.near"]); buf.push_str(&format!("{:?}", qemu_cmd)); @@ -404,10 +407,6 @@ fn main_docker( if debug_shell || !json_output { cmd.args(&["--interactive", "--tty"]); } - if full { - cmd.args(&["--env", "CARGO_PROFILE_RELEASE_LTO=fat"]) - .args(&["--env", "CARGO_PROFILE_RELEASE_CODEGEN_UNITS=1"]); - } cmd.arg(tagged_image); if debug_shell { From 9830540ee55d87250d11dd1e40d9c7cc2b382079 Mon Sep 17 00:00:00 2001 From: Michal Nazarewicz Date: Fri, 28 Oct 2022 12:15:39 +0100 Subject: [PATCH 047/103] crypto: move base58 formatting into single struct reducing duplication (#7953) Have a single place where binary buffers are formatted as base58, namely a new helper Bs58 struct, and use it to reduce code duplication. It also avoids allocation as it can format the buffer onto stack without creating a new string. With some more cleanups, remove bunch of other unnecessary vector and string allocations. --- core/crypto/src/signature.rs | 73 ++++++++++++++++++++---------------- 1 file changed, 40 insertions(+), 33 deletions(-) diff --git a/core/crypto/src/signature.rs b/core/crypto/src/signature.rs index 9b123085330..c7baebf0f74 100644 --- a/core/crypto/src/signature.rs +++ b/core/crypto/src/signature.rs @@ -24,14 +24,10 @@ pub enum KeyType { impl Display for KeyType { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), std::fmt::Error> { - write!( - f, - "{}", - match self { - KeyType::ED25519 => "ed25519", - KeyType::SECP256K1 => "secp256k1", - }, - ) + f.write_str(match self { + KeyType::ED25519 => "ed25519", + KeyType::SECP256K1 => "secp256k1", + }) } } @@ -111,7 +107,7 @@ impl AsRef<[u8]> for Secp256K1PublicKey { impl std::fmt::Debug for Secp256K1PublicKey { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> { - write!(f, "{}", bs58::encode(&self.0.to_vec()).into_string()) + Display::fmt(&Bs58(&self.0), f) } } @@ -164,7 +160,7 @@ impl TryFrom<&[u8]> for ED25519PublicKey { impl std::fmt::Debug for ED25519PublicKey { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> { - write!(f, "{}", bs58::encode(&self.0.to_vec()).into_string()) + Display::fmt(&Bs58(&self.0), f) } } @@ -260,7 +256,7 @@ impl Display for PublicKey { PublicKey::ED25519(public_key) => (KeyType::ED25519, &public_key.0[..]), PublicKey::SECP256K1(public_key) => (KeyType::SECP256K1, &public_key.0[..]), }; - write!(fmt, "{}:{}", key_type, bs58::encode(key_data).into_string()) + write!(fmt, "{}:{}", key_type, Bs58(key_data)) } } @@ -386,11 +382,7 @@ impl PartialEq for ED25519SecretKey { impl std::fmt::Debug for ED25519SecretKey { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> { - write!( - f, - "{}", - bs58::encode(&self.0[..ed25519_dalek::SECRET_KEY_LENGTH].to_vec()).into_string() - ) + Display::fmt(&Bs58(&self.0[..ed25519_dalek::SECRET_KEY_LENGTH]), f) } } @@ -469,11 +461,11 @@ impl SecretKey { impl std::fmt::Display for SecretKey { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> { - let data = match self { - SecretKey::ED25519(secret_key) => bs58::encode(&secret_key.0[..]).into_string(), - SecretKey::SECP256K1(secret_key) => bs58::encode(&secret_key[..]).into_string(), + let (key_type, key_data) = match self { + SecretKey::ED25519(secret_key) => (KeyType::ED25519, &secret_key.0[..]), + SecretKey::SECP256K1(secret_key) => (KeyType::SECP256K1, &secret_key[..]), }; - write!(f, "{}:{}", self.key_type(), data) + write!(f, "{}:{}", key_type, Bs58(key_data)) } } @@ -524,11 +516,7 @@ impl serde::Serialize for SecretKey { where S: serde::Serializer, { - let data = match self { - SecretKey::ED25519(secret_key) => bs58::encode(&secret_key.0[..]).into_string(), - SecretKey::SECP256K1(secret_key) => bs58::encode(&secret_key[..]).into_string(), - }; - serializer.serialize_str(&format!("{}:{}", self.key_type(), data)) + serializer.collect_str(self) } } @@ -639,7 +627,7 @@ impl PartialEq for Secp256K1Signature { impl Debug for Secp256K1Signature { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), std::fmt::Error> { - write!(f, "{}", bs58::encode(&self.0.to_vec()).into_string()) + Display::fmt(&Bs58(&self.0), f) } } @@ -777,19 +765,17 @@ impl BorshDeserialize for Signature { impl Display for Signature { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> { - let data = match self { - Signature::ED25519(signature) => { - bs58::encode(&signature.to_bytes().to_vec()).into_string() - } - Signature::SECP256K1(signature) => bs58::encode(&signature.0[..]).into_string(), + let (key_type, key_data) = match self { + Signature::ED25519(signature) => (KeyType::ED25519, signature.as_ref()), + Signature::SECP256K1(signature) => (KeyType::SECP256K1, &signature.0[..]), }; - write!(f, "{}", format!("{}:{}", self.key_type(), data)) + write!(f, "{}:{}", key_type, Bs58(key_data)) } } impl Debug for Signature { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), std::fmt::Error> { - write!(f, "{}", self) + Display::fmt(self, f) } } @@ -856,6 +842,27 @@ impl<'de> serde::Deserialize<'de> for Signature { } } +/// Helper struct which provides Display implementation for bytes slice +/// encoding them using base58. +// TODO(mina86): Get rid of it once bs58 has this feature. There’s currently PR +// for that. +struct Bs58<'a>(&'a [u8]); + +impl<'a> core::fmt::Display for Bs58<'a> { + fn fmt(&self, fmt: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + debug_assert!(self.0.len() <= 65); + // The largest buffer we’re ever encoding is 65-byte long. Base58 + // increases size of the value by less than 40%. 96-byte buffer is + // therefore enough to fit the largest value we’re ever encoding. + let mut buf = [0u8; 96]; + let len = bs58::encode(self.0).into(&mut buf[..]).unwrap(); + let output = &buf[..len]; + // SAFETY: we know that alphabet can only include ASCII characters + // thus our result is an ASCII string. + fmt.write_str(unsafe { std::str::from_utf8_unchecked(output) }) + } +} + #[cfg(test)] mod tests { use super::*; From 552a629bd65ecc75f60956566a0758776dfbf03f Mon Sep 17 00:00:00 2001 From: Blas Rodriguez Irizar Date: Fri, 28 Oct 2022 14:00:37 +0200 Subject: [PATCH 048/103] logic: improve test coverage and error messages of ed25519_verify (#7804) ref: #7567 --- runtime/near-vm-logic/src/logic.rs | 35 ++- .../near-vm-logic/src/tests/ed25519_verify.rs | 288 ++++++++++++++++++ runtime/near-vm-logic/src/tests/miscs.rs | 85 ------ runtime/near-vm-logic/src/tests/mod.rs | 2 + 4 files changed, 316 insertions(+), 94 deletions(-) create mode 100644 runtime/near-vm-logic/src/tests/ed25519_verify.rs diff --git a/runtime/near-vm-logic/src/logic.rs b/runtime/near-vm-logic/src/logic.rs index 2a1c44f96ce..53bce2b9b26 100644 --- a/runtime/near-vm-logic/src/logic.rs +++ b/runtime/near-vm-logic/src/logic.rs @@ -1137,6 +1137,12 @@ impl<'a> VMLogic<'a> { /// /// `input_cost(num_bytes_signature) + input_cost(num_bytes_message) + input_cost(num_bytes_public_key) + /// ed25519_verify_base + ed25519_verify_byte * num_bytes_message` + /// + /// # Error + /// + /// If the public key's size is not equal to 32 returns [HostError::Ed25519VerifyInvalidInput]. + /// If the signature size is not equal to 64 returns [HostError::Ed25519VerifyInvalidInput]. + #[cfg(feature = "protocol_feature_ed25519_verify")] pub fn ed25519_verify( &mut self, @@ -1147,26 +1153,37 @@ impl<'a> VMLogic<'a> { pub_key_len: u64, pub_key_ptr: u64, ) -> Result { - use ed25519_dalek::{PublicKey, Signature, Verifier, SIGNATURE_LENGTH}; + use ed25519_dalek::{PublicKey, Signature, Verifier, PUBLIC_KEY_LENGTH, SIGNATURE_LENGTH}; self.gas_counter.pay_base(ed25519_verify_base)?; - if sig_len != SIGNATURE_LENGTH as u64 { + + let signature_array = self.get_vec_from_memory_or_register(sig_ptr, sig_len)?; + if signature_array.len() != SIGNATURE_LENGTH { return Err(VMLogicError::HostError(HostError::Ed25519VerifyInvalidInput { msg: "invalid signature length".to_string(), })); } + + let signature = match Signature::from_bytes(&signature_array) { + Ok(signature) => signature, + Err(_) => return Ok(0), + }; + let msg = self.get_vec_from_memory_or_register(msg_ptr, msg_len)?; - let signature_array = self.get_vec_from_memory_or_register(sig_ptr, sig_len)?; - let signature = Signature::from_bytes(&signature_array).map_err(|e| { - VMLogicError::HostError(HostError::Ed25519VerifyInvalidInput { msg: e.to_string() }) - })?; let num_bytes = msg.len(); self.gas_counter.pay_per(ed25519_verify_byte, num_bytes as _)?; let pub_key_array = self.get_vec_from_memory_or_register(pub_key_ptr, pub_key_len)?; - let pub_key = PublicKey::from_bytes(&pub_key_array).map_err(|e| { - VMLogicError::HostError(HostError::Ed25519VerifyInvalidInput { msg: e.to_string() }) - })?; + if pub_key_array.len() != PUBLIC_KEY_LENGTH { + return Err(VMLogicError::HostError(HostError::Ed25519VerifyInvalidInput { + msg: "invalid public key length".to_string(), + })); + } + let pub_key = match PublicKey::from_bytes(&pub_key_array) { + Ok(pub_key) => pub_key, + Err(_) => return Ok(0), + }; + match pub_key.verify(&msg, &signature) { Err(_) => Ok(0), Ok(()) => Ok(1), diff --git a/runtime/near-vm-logic/src/tests/ed25519_verify.rs b/runtime/near-vm-logic/src/tests/ed25519_verify.rs new file mode 100644 index 00000000000..edc04d02fd3 --- /dev/null +++ b/runtime/near-vm-logic/src/tests/ed25519_verify.rs @@ -0,0 +1,288 @@ +use crate::tests::fixtures::get_context; +use crate::tests::helpers::*; +use crate::tests::vm_logic_builder::VMLogicBuilder; +use crate::VMLogic; +use crate::{map, ExtCosts}; +use near_vm_errors::HostError; +use near_vm_errors::VMLogicError; + +use std::collections::HashMap; + +fn create_signature() -> [u8; 64] { + [ + 145, 193, 203, 18, 114, 227, 14, 117, 33, 213, 121, 66, 130, 14, 25, 4, 36, 120, 46, 142, + 226, 215, 7, 66, 122, 112, 97, 30, 249, 135, 61, 165, 221, 249, 252, 23, 105, 40, 56, 70, + 31, 152, 236, 141, 154, 122, 207, 20, 75, 118, 79, 90, 168, 6, 221, 122, 213, 29, 126, 196, + 216, 104, 191, 6, + ] +} + +fn create_public_key() -> [u8; 32] { + [ + 32, 122, 6, 120, 146, 130, 30, 37, 215, 112, 241, 251, 160, 196, 124, 17, 255, 75, 129, 62, + 84, 22, 46, 206, 158, 184, 57, 224, 118, 35, 26, 182, + ] +} + +#[track_caller] +fn check_ed25519_verify( + logic: &mut VMLogic, + signature_len: usize, + signature: &[u8], + message_len: usize, + message: &[u8], + public_key_len: usize, + public_key: &[u8], + want: Result, + want_costs: HashMap, +) { + let result = logic.ed25519_verify( + signature_len as _, + signature.as_ptr() as _, + message_len as _, + message.as_ptr() as _, + public_key_len as _, + public_key.as_ptr() as _, + ); + + assert_eq!(want, result); + assert_costs(want_costs); +} + +#[test] +fn test_ed25519_verify_behavior_and_errors() { + let mut logic_builder = VMLogicBuilder::default(); + let mut logic = logic_builder.build(get_context(vec![], false)); + + let signature = create_signature(); + let bad_signature: [u8; 64] = [1; 64]; + + let mut forged_signature = signature.clone(); + // create a forged signature with the `s` scalar not properly reduced + // https://docs.rs/ed25519/latest/src/ed25519/lib.rs.html#302 + forged_signature[63] = 0b1110_0001; + + let public_key = create_public_key(); + + let mut forged_public_key = public_key.clone(); + // create a forged public key to force a PointDecompressionError + // https://docs.rs/ed25519-dalek/latest/src/ed25519_dalek/public.rs.html#142 + forged_public_key[31] = 0b1110_0001; + + // 32 bytes message + let message: [u8; 32] = [ + 107, 97, 106, 100, 108, 102, 107, 106, 97, 108, 107, 102, 106, 97, 107, 108, 102, 106, 100, + 107, 108, 97, 100, 106, 102, 107, 108, 106, 97, 100, 115, 107, + ]; + + let scenarios = [ + ( + signature.len(), + signature.clone(), + message.len(), + message.as_slice(), + public_key.len(), + public_key.clone(), + Ok(1), + map! { + ExtCosts::read_memory_byte: 128, + ExtCosts::read_memory_base: 3, + ExtCosts::ed25519_verify_base: 1, + ExtCosts::ed25519_verify_byte: 32, + }, + ), + ( + signature.len(), + signature.clone(), + message.len(), + message.as_slice(), + public_key.len(), + forged_public_key.clone(), + Ok(0), + map! { + ExtCosts::read_memory_byte: 128, + ExtCosts::read_memory_base: 3, + ExtCosts::ed25519_verify_base: 1, + ExtCosts::ed25519_verify_byte: 32, + }, + ), + ( + signature.len(), + signature.clone(), + message.len(), + message.as_slice(), + public_key.len() - 1, + public_key.clone(), + Err(VMLogicError::HostError(HostError::Ed25519VerifyInvalidInput { + msg: "invalid public key length".to_string(), + })), + map! { + ExtCosts::read_memory_byte: 127, + ExtCosts::read_memory_base: 3, + ExtCosts::ed25519_verify_base: 1, + ExtCosts::ed25519_verify_byte: 32, + }, + ), + ( + bad_signature.len(), + bad_signature.clone(), + message.len(), + message.as_slice(), + public_key.len(), + public_key.clone(), + Ok(0), + map! { + ExtCosts::read_memory_byte: 128, + ExtCosts::read_memory_base: 3, + ExtCosts::ed25519_verify_base: 1, + ExtCosts::ed25519_verify_byte: 32, + }, + ), + ( + signature.len() - 1, + signature.clone(), + message.len(), + message.as_slice(), + public_key.len(), + public_key.clone(), + Err(VMLogicError::HostError(HostError::Ed25519VerifyInvalidInput { + msg: "invalid signature length".to_string(), + })), + map! { + ExtCosts::read_memory_base: 1, + ExtCosts::read_memory_byte: 63, + ExtCosts::ed25519_verify_base: 1, + }, + ), + ( + forged_signature.len(), + forged_signature.clone(), + message.len(), + message.as_slice(), + public_key.len(), + public_key.clone(), + Ok(0), + map! { + ExtCosts::read_memory_base: 1, + ExtCosts::read_memory_byte: 64, + ExtCosts::ed25519_verify_base: 1, + }, + ), + ( + forged_signature.len(), + forged_signature.clone(), + 0, + message.as_slice(), + public_key.len(), + public_key.clone(), + Ok(0), + map! { + ExtCosts::read_memory_base: 1, + ExtCosts::read_memory_byte: 64, + ExtCosts::ed25519_verify_base: 1, + }, + ), + ]; + + for ( + signature_len, + signature, + message_len, + message, + public_key_len, + public_key, + expected_result, + want_costs, + ) in scenarios + { + check_ed25519_verify( + &mut logic, + signature_len as _, + signature.as_ref(), + message_len as _, + message.as_ref(), + public_key_len as _, + public_key.as_ref(), + expected_result, + want_costs, + ); + } +} + +#[test] +fn test_ed25519_verify_check_registers() { + let mut logic_builder = VMLogicBuilder::default(); + let mut logic = logic_builder.build(get_context(vec![], false)); + + let signature = create_signature(); + let public_key = create_public_key(); + + let bad_signature: [u8; 64] = [1; 64]; + + // 32 bytes message + let message: [u8; 32] = [ + 107, 97, 106, 100, 108, 102, 107, 106, 97, 108, 107, 102, 106, 97, 107, 108, 102, 106, 100, + 107, 108, 97, 100, 106, 102, 107, 108, 106, 97, 100, 115, 107, + ]; + + let mut forged_signature = signature.clone(); + // create a forged signature with the `s` scalar not properly reduced + // https://docs.rs/ed25519/latest/src/ed25519/lib.rs.html#302 + forged_signature[63] = 0b1110_0001; + + // tests for data beingn read from registers + logic.wrapped_internal_write_register(1, &signature).unwrap(); + let result = logic.ed25519_verify( + u64::MAX, + 1 as _, + message.len() as _, + message.as_ptr() as _, + public_key.len() as _, + public_key.as_ptr() as _, + ); + assert_eq!(Ok(1u64), result); + + logic.wrapped_internal_write_register(1, &bad_signature).unwrap(); + let result = logic.ed25519_verify( + u64::MAX, + 1 as _, + message.len() as _, + message.as_ptr() as _, + public_key.len() as _, + public_key.as_ptr() as _, + ); + assert_eq!(Ok(0), result); + + logic.wrapped_internal_write_register(1, &forged_signature).unwrap(); + let result = logic.ed25519_verify( + u64::MAX, + 1 as _, + message.len() as _, + message.as_ptr() as _, + public_key.len() as _, + public_key.as_ptr() as _, + ); + assert_eq!(Ok(0), result); + + logic.wrapped_internal_write_register(1, &message).unwrap(); + let result = logic.ed25519_verify( + signature.len() as _, + signature.as_ptr() as _, + u64::MAX, + 1, + public_key.len() as _, + public_key.as_ptr() as _, + ); + assert_eq!(Ok(1), result); + + logic.wrapped_internal_write_register(1, &public_key).unwrap(); + let result = logic.ed25519_verify( + signature.len() as _, + signature.as_ptr() as _, + message.len() as _, + message.as_ptr() as _, + u64::MAX, + 1, + ); + assert_eq!(Ok(1), result); +} diff --git a/runtime/near-vm-logic/src/tests/miscs.rs b/runtime/near-vm-logic/src/tests/miscs.rs index 329d135a505..de128c0882a 100644 --- a/runtime/near-vm-logic/src/tests/miscs.rs +++ b/runtime/near-vm-logic/src/tests/miscs.rs @@ -858,88 +858,3 @@ fn test_contract_size_limit() { .into()) ); } - -#[cfg(feature = "protocol_feature_ed25519_verify")] -#[test] -fn test_ed25519_verify() { - use near_vm_errors::VMLogicError; - - let mut logic_builder = VMLogicBuilder::default(); - let mut logic = logic_builder.build(get_context(vec![], false)); - - let signature: [u8; 64] = [ - 145, 193, 203, 18, 114, 227, 14, 117, 33, 213, 121, 66, 130, 14, 25, 4, 36, 120, 46, 142, - 226, 215, 7, 66, 122, 112, 97, 30, 249, 135, 61, 165, 221, 249, 252, 23, 105, 40, 56, 70, - 31, 152, 236, 141, 154, 122, 207, 20, 75, 118, 79, 90, 168, 6, 221, 122, 213, 29, 126, 196, - 216, 104, 191, 6, - ]; - - let bad_signature: [u8; 64] = [1; 64]; - - let public_key: [u8; 32] = [ - 32, 122, 6, 120, 146, 130, 30, 37, 215, 112, 241, 251, 160, 196, 124, 17, 255, 75, 129, 62, - 84, 22, 46, 206, 158, 184, 57, 224, 118, 35, 26, 182, - ]; - - // 32 bytes message - let message: [u8; 32] = [ - 107, 97, 106, 100, 108, 102, 107, 106, 97, 108, 107, 102, 106, 97, 107, 108, 102, 106, 100, - 107, 108, 97, 100, 106, 102, 107, 108, 106, 97, 100, 115, 107, - ]; - - let result = logic - .ed25519_verify( - signature.len() as _, - signature.as_ptr() as _, - message.len() as _, - message.as_ptr() as _, - public_key.len() as _, - public_key.as_ptr() as _, - ) - .unwrap(); - - assert_eq!(result, 1); - - assert_costs(map! { - ExtCosts::read_memory_byte: 128, - ExtCosts::read_memory_base: 3, - ExtCosts::ed25519_verify_base: 1, - ExtCosts::ed25519_verify_byte: 32, - }); - - let result = logic - .ed25519_verify( - bad_signature.len() as _, - bad_signature.as_ptr() as _, - message.len() as _, - message.as_ptr() as _, - public_key.len() as _, - public_key.as_ptr() as _, - ) - .unwrap(); - - assert_eq!(result, 0); - - assert_costs(map! { - ExtCosts::read_memory_byte: 128, - ExtCosts::read_memory_base: 3, - ExtCosts::ed25519_verify_base: 1, - ExtCosts::ed25519_verify_byte: 32, - }); - - let result = logic.ed25519_verify( - (signature.len() - 1) as _, - signature.as_ptr() as _, - message.len() as _, - message.as_ptr() as _, - public_key.len() as _, - public_key.as_ptr() as _, - ); - - assert_eq!( - result, - Err(VMLogicError::HostError(HostError::Ed25519VerifyInvalidInput { - msg: "invalid signature length".to_string() - })) - ); -} diff --git a/runtime/near-vm-logic/src/tests/mod.rs b/runtime/near-vm-logic/src/tests/mod.rs index 3085d8fe4d0..d14a862d0d5 100644 --- a/runtime/near-vm-logic/src/tests/mod.rs +++ b/runtime/near-vm-logic/src/tests/mod.rs @@ -1,5 +1,7 @@ mod alt_bn128; mod context; +#[cfg(feature = "protocol_feature_ed25519_verify")] +mod ed25519_verify; mod fixtures; mod gas_counter; mod helpers; From 9a42069cc12ada7ee4149d4af109fc80bb759653 Mon Sep 17 00:00:00 2001 From: Simonas Kazlauskas Date: Fri, 28 Oct 2022 16:55:05 +0000 Subject: [PATCH 049/103] docs: review docs/practices/docs.md (#7957) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wasn’t as thorough as with the previous PR, and many of these changes are probably more on the subjective side. --- docs/practices/docs.md | 100 ++++++++++++++++++++++------------------- 1 file changed, 54 insertions(+), 46 deletions(-) diff --git a/docs/practices/docs.md b/docs/practices/docs.md index acfba34f27c..282f7569ff8 100644 --- a/docs/practices/docs.md +++ b/docs/practices/docs.md @@ -1,30 +1,35 @@ # Documentation -This chapter describes nearcore's approach to documentation. In general, -there are three primary types of documentations to keep in mind: - -* **Protocol specification** ([source](https://github.com/near/NEPs), - [rendered](https://nomicon.io)) is a formal description of the NEAR protocol. - It can be used to implement alternative NEAR clients. -* **User docs** ([rendered](https://docs.near.org)) explain how to use the near - network. User docs are also split into documentation for validators (that is, - how to run your own near node) and documentation for smart contract - developers. -* **Internal development docs** ([rendered](https://near.github.io/nearcore/), - [source](https://github.com/near/nearcore/tree/master/docs)) is the book you - are reading right now! The target audience here are nearcore contributors. - +This chapter describes nearcore's approach to documentation. There are three +primary types of documentation to keep in mind: + +* [**The NEAR Protocol Specification**][nomicon] ([source][src.nomicon]) is the + formal description of the NEAR protocol. The reference nearcore implementation + and any other NEAR client implementations must follow this specification. +* [**User docs**][docs.near] ([source][src.docs.near]) explain what is NEAR and + how to participate in the network. In particular, they contain information + pertinent to the users of NEAR: validators and smart contract developers. +* [**Documentation for nearcore developers**][this] ([source][src.this]) is the + book you are reading right now! The target audience here are the contributors + to the main implementation of the NEAR protocol (nearcore). + +[docs.near]: https://docs.near.org +[src.docs.near]: https://github.com/near/docs +[nomicon]: https://nomicon.io +[src.nomicon]: https://github.com/near/NEPs +[this]: https://near.github.io/nearcore/ +[src.this]: https://github.com/near/nearcore/tree/master/docs ## Overview -The bulk of the internal docs is this book. If you want to write some kind of a -document, add it here! The [architecture](../architecture/) and -[practices](../practices/) chapter are intended for somewhat up-to-date -normative documents, but in the [misc](../misc/) anything goes. +The bulk of the internal docs is within this book. If you want to write some +kind of a document, add it here! The [architecture](../architecture/) and +[practices](../practices/) chapters are intended for somewhat up-to-date +normative documents. The [misc](../misc/) chapter holds everything else. -These are internal docs, not user-facing ones, so don't worry about proper -English, typos, or beautiful diagrams -- just write stuff! It can easily be -improved over time with pull requests. For docs, we use a light-weight review +This book is not intended for user-facing documentation, so don't worry about +proper English, typos, or beautiful diagrams -- just write stuff! It can easily +be improved over time with pull requests. For docs, we use a light-weight review process and try to merge any improvement as quickly as possible. Rather than blocking a PR on some stylistic changes, just merge it and submit a follow up. @@ -34,27 +39,28 @@ typos you spot! In addition to the book, we also have some "inline" documentation in the code. For Rust, it is customary to have a per-crate `README.md` file and include it as a doc comment via `#![doc = include_str!("../README.md")]` in `lib.rs`. We don't -*require* every `struct` and `function` to be documented, but we certainly -encourage documenting as much as possible. If you spend some time refactoring or -fixing a function, consider adding a doc comment (`///`) to it as a drive-by -improvement. +*require* every item to be documented, but we certainly encourage documenting as +much as possible. If you spend some time refactoring or fixing a function, +consider adding a doc comment (`///`) to it as a drive-by improvement. + +We currently don't render `rustdoc`, see [#7836]. -We currently don't render `rustdoc`, see -[#7836](https://github.com/near/nearcore/issues/7836). +[#7836]: https://github.com/near/nearcore/issues/7836 ## Book How To -We use mdBook to render a bunch of markdown files as a nice doc with table of -contents, search and themes. Full docs are -[here](https://rust-lang.github.io/mdBook/), but the basics are very simple. +We use mdBook to render a bunch of markdown files as a static website with table +of contents, search and themes. Full docs are [here][mdbook], but the basics are +very simple. -To add a new page: +[mdbook]: https://rust-lang.github.io/mdBook/ + +To add a new page to the book: 1. Add an `.md` file somewhere in the [`./docs`](https://github.com/near/nearcore/tree/master/docs) folder. -2. Add an entry to - [`SUMMARY.md`](https://github.com/near/nearcore/blob/master/docs/SUMMARY.md) - file. +2. Add a link to this page to the + [`SUMMARY.md`](https://github.com/near/nearcore/blob/master/docs/SUMMARY.md). 3. Submit a PR (again, we promise to merge it without much ceremony). The doc itself is vanilla markdown. @@ -67,23 +73,25 @@ $ cargo install mdbook $ mdbook serve --open ./docs ``` -This will convert `.md` file from the docs folder to `.html`, open a browser, -and start a file watcher to rebuild and reload on change. +This will generate the book from the docs folder, open it in a browser and +start a file watcher to rebuild the book every time the source files change. + +Note that GitHub's default rendering mostly works just as well, so you don't +need to go out of your way to preview your changes when drafting a page or +reviewing pull requests to this book. -Note that GitHub's default rendering mostly just works as well, so you don't -need special preview when reviewing pull requests to docs. +The book is deployed via the [book GitHub Action workflow][GHA]. This workflow +runs mdBook and then deploys the result to [GitHub Pages][GHP]. -The book deployed via this GitHub Action: -[.github/workflows/book.yml](https://github.com/near/nearcore/blob/master/.github/workflows/book.yml). -It just runs mdBook and then deploys the result to -https://near.github.io/nearcore/. +[GHA]: https://github.com/near/nearcore/blob/master/.github/workflows/book.yml +[GHP]: https://docs.github.com/en/pages/getting-started-with-github-pages/about-github-pages For internal docs, you often want to have pretty pictures. We don't currently have a recommended workflow, but here are some tips: -* Don't add binary media files to Git to avoid inflating repository size. - Rather, upload images as comments to this super-secret issue - [#7821](https://github.com/near/nearcore/issues/7821), and then link to +* Don't add binary media files to Git to avoid inflating repository size. + Rather, upload images as comments to this super-secret issue + [#7821](https://github.com/near/nearcore/issues/7821), and then link to the images as ``` @@ -92,7 +100,7 @@ have a recommended workflow, but here are some tips: Use single comment per page with multiple images. -* Google Doc is an OK way to create technical drawings, you can add a link to +* Google Docs is an OK way to create technical drawings, you can add a link to the doc with source to that secret issue as well. * There's some momentum around using mermaid.js for diagramming, and there's From 768bc8554aacec36360f66a9d55619b12951e124 Mon Sep 17 00:00:00 2001 From: Simonas Kazlauskas Date: Fri, 28 Oct 2022 17:10:48 +0000 Subject: [PATCH 050/103] docs: proof-read fast-builds nearcore book page (#7956) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This all started with me noticing a typo ("to slow”), but then I realized there are other places that could be reworded to flow smoother as well. --- docs/practices/fast_builds.md | 67 +++++++++++++++++++---------------- 1 file changed, 36 insertions(+), 31 deletions(-) diff --git a/docs/practices/fast_builds.md b/docs/practices/fast_builds.md index bbc100dea6b..81180405489 100644 --- a/docs/practices/fast_builds.md +++ b/docs/practices/fast_builds.md @@ -1,34 +1,36 @@ # Fast Builds nearcore is implemented in Rust and is a fairly sizable project, so it takes a -while to build. This chapter collects various tips to make the process faster. +while to build. This chapter collects various tips to make the development +process faster. Optimizing build times is a bit of a black art, so please do benchmarks on your -machine to verify that the improvement work for you. Changing some configuration -and making some type, which prevents it from improving build times is an +machine to verify that the improvements work for you. Changing some configuration +and making a typo, which prevents it from improving build times is an extremely common failure mode! [Rust Perf Book](https://nnethercote.github.io/perf-book/compile-times.html) -contains a section on compilation time as well! +contains a section on compilation times as well! ## Release Builds and Link Time Optimization Obviously, `cargo build --release` is slower than `cargo build`. We enable full lto (link time optimization), so our `-r` builds are very slow, use a lot of -RAM, and don't take advantage full of parallelism. +RAM, and don't utilize the available parallelism fully. -As debug builds are much to slow at runtime for many purposes, we have a custom -profile `--profile quick-release` which is equivalent to `-r`, but doesn't do -`lto`. +As debug builds are much too slow at runtime for many purposes, we have a custom +profile `--profile quick-release` which is equivalent to `-r`, except that time +consuming options such as LTO are disabled. Use `--profile quick-release` when doing comparative benchmarking, or when -connecting a locally build node to a network. Use `-r` if you want to get +connecting a locally built node to a network. Use `-r` if you want to get absolute performance numbers. ## Linker -By default, `rustc` uses system's linker, which might be quite slow. Using `lld` -(LLVM linker) or `mold` (very new, very fast linker) is usually a big win. +By default, `rustc` uses the default system linker, which tends to be quite +slow. Using `lld` (LLVM linker) or `mold` (very new, very fast linker) provides +big wins for many setups. I don't know what's the official source of truth for using alternative linkers, I usually refer to [this @@ -47,21 +49,22 @@ lld itself can be installed with `sudo apt install lld`. ## Prebuilt RocksDB -By default, we compile RocksDB (a C++ project) from source, which takes a lot of -time. A faster alternative is to link to a prebuilt copy of RocksDB. This is a -huge win, especially if you clean `./target` directory frequently. +By default, we compile RocksDB (a C++ project) from source during the neard +build. By linking to a prebuilt copy of RocksDB this work can be avoided +entirely. This is a huge win, especially if you clean the `./target` directory +frequently. -To use prebuilt RocksDB set `ROCKSDB_LIB_DIR` environment variable to location -where `librocksdb.a` file is installed: +In order to use prebuilt RocksDB, set `ROCKSDB_LIB_DIR` environment variable to +a location containing `librocksdb.a`: ```console $ export ROCKSDB_LIB_DIR=/usr/lib/x86_64-linux-gnu $ cargo build -p neard ``` -Note that the system must provide a recent version of the library which, +Note, that the system must provide a recent version of the library which, depending on operating system you’re using, may require installing packages from -testing branches. For example, on Debian it requires installing +a testing branch. For example, on Debian it requires installing `librocksdb-dev` from `experimental` version: ```bash @@ -76,11 +79,13 @@ export ROCKSDB_LIB_DIR ## Global Compilation Cache -By default, Rust uses incremental compilation, with intermediate artifacts -stored in the project-local `./target` directory. +By default, Rust compiles incrementally, with the incremental cache and +intermediate outputs stored in the project-local `./target` directory. -[`sccache`](https://github.com/mozilla/sccache) utility can be used to add a -global compilation to the mix: +[`sccache`](https://github.com/mozilla/sccache) utility can be used to share +these artifacts between machines or checkouts within the same machine. `sccache` +works by intercepting calls to `rustc` and will fetch the cached outputs from +the global cache whenever possible. This tool can be set up as such: ```console $ cargo install sccache @@ -89,19 +94,19 @@ $ export SCCACHE_CACHE_SIZE="30G" $ cargo build -p neard ``` -`sccache` intercepts calls to `rustc` and pattern-matches compiler's command -line to get a cached result. +Refer to the [project’s README](https://github.com/mozilla/sccache) for further +configuration options. ## IDEs Are Bad For Environment Generally, the knobs in this section are controlled either via global -configuration in `~/.cargo/config` or environmental variables. +configuration in `~/.cargo/config` or environment variables. -Environmental variables are notoriously easy to lose, especially if you are -working both from a command line and from a graphical IDE. Double check that you -are not missing any of our build optimizations, the failure mode here is nasty, -as the stuff just takes longer to compile without givin any visual indication of -an error. +Environment variables are notoriously easy to lose, especially if you are +working both from a command line and graphical IDE. Double check that the +environment within which builds are executed is identical in order to avoid +nasty failure modes such as full cache invalidation when switching +from the CLI to an IDE or vice-versa. [`direnv`](https://direnv.net) sometimes can be used to conveniently manage -project-specific environmentalvariable. +project-specific environment variables. From 6b4dae1390cc7afeec1fb1a4a61aabfc7496f865 Mon Sep 17 00:00:00 2001 From: Jakob Meier Date: Fri, 28 Oct 2022 19:25:45 +0100 Subject: [PATCH 051/103] doc: how to add a new parameter (#7952) Add documentation on how new gas cost parameters should be added, as reference for anyone implementing new features. --- docs/architecture/gas/parameter_definition.md | 72 +++++++++++++++++-- 1 file changed, 67 insertions(+), 5 deletions(-) diff --git a/docs/architecture/gas/parameter_definition.md b/docs/architecture/gas/parameter_definition.md index 4ec8159346b..1c6bc30dfa5 100644 --- a/docs/architecture/gas/parameter_definition.md +++ b/docs/architecture/gas/parameter_definition.md @@ -1,13 +1,75 @@ # Parameter Definitions Gas parameters are a subset of runtime parameters that are defined in -[core/primitives/res/runtime_configs/parameters.txt](https://github.com/near/nearcore/blob/master/core/primitives/res/runtime_configs/parameters.txt). +[core/primitives/res/runtime_configs/parameters.txt](https://github.com/near/nearcore/blob/d0dc37bf81f7e7bde9c560403b085fae04108659/core/primitives/res/runtime_configs/parameters.txt). IMPORTANT: This is not the final list of parameters, it contains the base values which can be overwritten per protocol version. For example, -[53.txt](core/primitives/res/runtime_configs/53.txt) changes several parameters +[53.txt](https://github.com/near/nearcore/blob/d0dc37bf81f7e7bde9c560403b085fae04108659/core/primitives/res/runtime_configs/53.txt) +changes several parameters starting from version 53. To see all parameter values for a specific version at -once, check out list of JSON snapshots generated in this directory: -[core/primitives/src/runtime/snapshots](https://github.com/near/nearcore/blob/master/core/primitives/src/runtime/snapshots). +once, check out the list of JSON snapshots generated in this directory: +[core/primitives/src/runtime/snapshots](https://github.com/near/nearcore/blob/d0dc37bf81f7e7bde9c560403b085fae04108659/core/primitives/src/runtime/snapshots). - + +## How to Add a New Parameter + +First and foremost, if you are feeling lost, open a topic in our Zulip chat +([pagoda/contract-runtime](https://near.zulipchat.com/#narrow/stream/295306-pagoda.2Fcontract-runtime)). +We are here to help. + +### Principles +Before adding anything, please review the basic principles for gas parameters. +- A parameter must correspond to a clearly defined workload. +- When the workload is scalable by a factor `N` that depends on user input, + likely it will require a base parameter and a second parameter that is + multiplied by `N`. (Example: `N` = number of bytes when reading a value from + storage.) +- Charge gas before executing the workload. +- Parameters should be independent form specific implementation choices in + nearcore. +- Ideally, contract developers can easily understand what the cost is simply by + reading the name in a gas profile. + +The section on [Gas Profiles](./gas_profile.md#charging-gas) explains how to +charge gas, please also take that into considerations when defining a new +parameter. + +### Necessary Code Changes +Adding the parameter in code involves several steps. +1. Define the parameter by adding it to the list in `core/primitives/res/runtime_configs/parameters.txt.` +2. Update the Rust view of parameters by adding a variant to `enum Parameter` + in `core/primitives-core/src/parameter.rs`. In the same file, update + `enum FeeParameter` if you add an action cost or update `ext_costs()` + if you add a cost inside function calls. +3. Update `RuntimeConfig`, the configuration used to reference parameters in + code. Depending on the type of parameter, you will need to update + `RuntimeFeesConfig` (for action costs) or `ExtCostsConfig` (for gas costs). +4. Update the list used for gas profiles. This is defined by `enum Cost` in + `core/primitives-core/src/profile.rs`. You need to add a variant to either + `enum ActionCosts` or `enum ExtCost`. Please also update `fn index()` that + maps each profile entry to a unique position in serialized gas profiles. +5. The parameter should be available to use in the code section you need it. Now + is a good time to ensure `cargo check` and `cargo test --no-run` pass. Most + likely you have to update some testing code, such as + `ExtCostsConfig::test()`. +6. To merge your changes into nearcore, you will have to hide your parameter + behind a feature flag. Add the feature to the `Cargo.toml` of each crate + touched in step 3 and 4 and hide the code behind `#[cfg(feature = + "protocol_feature_MY_NEW_FEATURE")]`. Do not hide code in step 2 so that + non-nightly builds can still read `parameters.txt`. Also add your feature as + a dependency on `nightly` in `core/primitives/Cargo.toml` to make sure it + gets included when compiling for nightly. After that, check `cargo check` and + `cargo test --no-run` with and without `features=nightly`. + +### What Gas Value Should the Parameter Have? +For a first draft, the exact gas value used in the parameter is not crucial. +Make sure the right set of parameters exists and try to set a number that roughly +makes sense. This should be enough to enable discussions on the NEP around +feasibility and usefulness of the proposed feature. If you are not sure, a good +rule of thumb is 0.1 Tgas for each disk operation and at least 1 Tgas for each +ms of CPU time. Then round it up generously. + +The value will have to be refined later. This is usually the last step, after +the implementation is complete and reviewed. Have a look at the section on +[estimating gas parameters](./estimator.md) of the book. From c71683ab5d0a700d8d0ac49015e58e6cba00404c Mon Sep 17 00:00:00 2001 From: Jakob Meier Date: Fri, 28 Oct 2022 20:15:04 +0100 Subject: [PATCH 052/103] feat: trie cache configuration (#7578) Make the shard cache max total bytes configurable. Also add separate configuration for view caches. This deprecates the old format for configuring cache capacity. The old format will still work for now but values in the new format will overwrite any values set in the old format. Example of the new format, that sets all normal caches to 50MB, aurora's shard to 100MB, shard 3 to 3GB, and view caches to 30MB: ```json { "trie_cache": { "default_max_bytes": 50000000, "per_shard_max_bytes": { "shard1.v1": 10000000, "shard3.v1": 3000000000 } }, "view_trie_cache": { "default_max_bytes": 30000000 } } ``` resolves #7564 --- CHANGELOG.md | 5 ++ core/primitives/src/shard_layout.rs | 101 +++++++++++++++++++++++- core/store/src/config.rs | 52 +++++++++++-- core/store/src/trie/config.rs | 116 ++++++++++------------------ core/store/src/trie/mod.rs | 1 + core/store/src/trie/trie_storage.rs | 47 ++++++++++- core/store/src/trie/trie_tests.rs | 2 +- nearcore/src/runtime/mod.rs | 4 +- 8 files changed, 243 insertions(+), 85 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 745d1ef9c85..b812b4a2438 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -52,6 +52,11 @@ a hard limit but instead sets a memory consumption limit. For large trie nodes, the limits are close to equivalent. For small values, there can now fit more in the cache than previously. + [#7749](https://github.com/near/nearcore/pull/7749) +* New options `store.trie_cache` and `store.view_trie_cache` in `config.json` + to set limits on the trie cache. Deprecates the never announced + `store.trie_cache_capacities` option which was mentioned in previous change. + [#7578](https://github.com/near/nearcore/pull/7578) * Tracing of work across actix workers within a process: [#7866](https://github.com/near/nearcore/pull/7866), [#7819](https://github.com/near/nearcore/pull/7819), diff --git a/core/primitives/src/shard_layout.rs b/core/primitives/src/shard_layout.rs index 0aa49cc87b0..ad460595df9 100644 --- a/core/primitives/src/shard_layout.rs +++ b/core/primitives/src/shard_layout.rs @@ -1,4 +1,5 @@ use std::cmp::Ordering::Greater; +use std::{fmt, str}; use byteorder::{LittleEndian, ReadBytesExt}; use serde::{Deserialize, Serialize}; @@ -287,7 +288,7 @@ fn is_top_level_account(top_account: &AccountId, account: &AccountId) -> bool { } /// ShardUId is an unique representation for shards from different shard layout -#[derive(Serialize, Deserialize, Hash, Clone, Debug, Copy, PartialEq, Eq, PartialOrd, Ord)] +#[derive(Hash, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] pub struct ShardUId { pub version: ShardVersion, pub shard_id: u32, @@ -355,6 +356,104 @@ pub fn get_block_shard_uid_rev( Ok((block_hash, shard_id)) } +impl fmt::Display for ShardUId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "s{}.v{}", self.shard_id, self.version) + } +} + +impl fmt::Debug for ShardUId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(self, f) + } +} + +impl str::FromStr for ShardUId { + type Err = String; + + fn from_str(s: &str) -> Result { + let (shard_str, version_str) = s + .split_once(".") + .ok_or_else(|| format!("shard version and number must be separated by \".\""))?; + + let version = version_str + .strip_prefix("v") + .ok_or_else(|| format!("shard version must start with \"v\""))? + .parse::() + .map_err(|e| format!("shard version after \"v\" must be a number, {e}"))?; + + let shard_str = + shard_str.strip_prefix("s").ok_or_else(|| format!("shard id must start with \"s\""))?; + let shard_id = shard_str + .parse::() + .map_err(|e| format!("shard id after \"s\" must be a number, {e}"))?; + + Ok(ShardUId { shard_id, version }) + } +} + +impl<'de> serde::Deserialize<'de> for ShardUId { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + deserializer.deserialize_any(ShardUIdVisitor) + } +} + +impl serde::Serialize for ShardUId { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + serializer.serialize_str(&self.to_string()) + } +} + +struct ShardUIdVisitor; +impl<'de> serde::de::Visitor<'de> for ShardUIdVisitor { + type Value = ShardUId; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + write!( + formatter, + "either string format of `ShardUId` like s0v1 for shard 0 version 1, or a map" + ) + } + + fn visit_str(self, v: &str) -> Result + where + E: serde::de::Error, + { + v.parse().map_err(|e| E::custom(e)) + } + + fn visit_map(self, mut map: A) -> Result + where + A: serde::de::MapAccess<'de>, + { + // custom struct deserialization for backwards compatibility + // TODO(#7894): consider removing this code after checking + // `ShardUId` is nowhere serialized in the old format + let mut version = None; + let mut shard_id = None; + + while let Some((field, value)) = map.next_entry()? { + match field { + "version" => version = Some(value), + "shard_id" => shard_id = Some(value), + _ => return Err(serde::de::Error::unknown_field(field, &["version", "shard_id"])), + } + } + + match (version, shard_id) { + (None, _) => Err(serde::de::Error::missing_field("version")), + (_, None) => Err(serde::de::Error::missing_field("shard_id")), + (Some(version), Some(shard_id)) => Ok(ShardUId { version, shard_id }), + } + } +} + #[cfg(test)] mod tests { use crate::shard_layout::{account_id_to_shard_id, ShardLayout, ShardUId}; diff --git a/core/store/src/config.rs b/core/store/src/config.rs index 4bcc9326c52..fef5cc45640 100644 --- a/core/store/src/config.rs +++ b/core/store/src/config.rs @@ -1,4 +1,7 @@ use near_primitives::shard_layout::ShardUId; +use std::{collections::HashMap, iter::FromIterator}; + +use crate::trie::DEFAULT_SHARD_CACHE_TOTAL_SIZE_LIMIT; #[derive(Clone, Debug, serde::Serialize, serde::Deserialize)] #[serde(default)] @@ -37,12 +40,15 @@ pub struct StoreConfig { /// the performance of the storage pub block_size: bytesize::ByteSize, - /// Trie cache capacities - /// Default value: ShardUId {version: 1, shard_id: 3} -> 45_000_000 - /// We're still experimenting with this parameter and it seems decreasing its value can improve - /// the performance of the storage + /// DEPRECATED: use `trie_cache` instead. + /// TODO(#7894): Remove in version >1.31 pub trie_cache_capacities: Vec<(ShardUId, u64)>, + /// Trie cache configuration per shard for normal (non-view) caches. + pub trie_cache: TrieCacheConfig, + /// Trie cache configuration per shard for view caches. + pub view_trie_cache: TrieCacheConfig, + /// Enable fetching account and access key data ahead of time to avoid IO latency. pub enable_receipt_prefetching: bool, @@ -171,7 +177,22 @@ impl Default for StoreConfig { // we use it since then. block_size: bytesize::ByteSize::kib(16), - trie_cache_capacities: vec![(ShardUId { version: 1, shard_id: 3 }, 45_000_000)], + // deprecated + trie_cache_capacities: vec![], + + trie_cache: TrieCacheConfig { + default_max_bytes: DEFAULT_SHARD_CACHE_TOTAL_SIZE_LIMIT, + // Temporary solution to make contracts with heavy trie access + // patterns on shard 3 more stable. It was chosen by the estimation + // of the largest contract storage size we are aware as of 23/08/2022. + // Consider removing after implementing flat storage. (#7327) + per_shard_max_bytes: HashMap::from_iter([( + ShardUId { version: 1, shard_id: 3 }, + 3_000_000_000, + )]), + }, + view_trie_cache: TrieCacheConfig::default(), + enable_receipt_prefetching: true, sweat_prefetch_receivers: vec![ "token.sweat".to_owned(), @@ -222,3 +243,24 @@ impl Default for MigrationSnapshot { Self::Enabled(true) } } + +#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)] +#[serde(default)] +pub struct TrieCacheConfig { + /// Limit the memory consumption of the trie cache per shard. + /// + /// This is an approximate limit that attempts to factor in data structure + /// overhead also. It is supposed to be fairly accurate in the limit. + pub default_max_bytes: u64, + /// Overwrites `default_max_bytes` for specific shards. + pub per_shard_max_bytes: HashMap, +} + +impl Default for TrieCacheConfig { + fn default() -> Self { + Self { + default_max_bytes: DEFAULT_SHARD_CACHE_TOTAL_SIZE_LIMIT, + per_shard_max_bytes: Default::default(), + } + } +} diff --git a/core/store/src/trie/config.rs b/core/store/src/trie/config.rs index e6025117f46..00913575cb7 100644 --- a/core/store/src/trie/config.rs +++ b/core/store/src/trie/config.rs @@ -1,22 +1,15 @@ +use crate::config::TrieCacheConfig; use crate::trie::trie_storage::TrieCacheInner; use crate::StoreConfig; -use near_primitives::shard_layout::ShardUId; use near_primitives::types::AccountId; -use std::collections::HashMap; use std::str::FromStr; -use tracing::error; +use tracing::{error, warn}; -/// Default number of cache entries. -/// It was chosen to fit into RAM well. RAM spend on trie cache should not exceed 50_000 * 4 (number of shards) * -/// TRIE_LIMIT_CACHED_VALUE_SIZE * 2 (number of caches - for regular and view client) = 0.4 GB. -/// In our tests on a single shard, it barely occupied 40 MB, which is dominated by state cache size -/// with 512 MB limit. The total RAM usage for a single shard was 1 GB. -const TRIE_DEFAULT_SHARD_CACHE_SIZE: u64 = if cfg!(feature = "no_cache") { 1 } else { 50000 }; - -/// Default total size of values which may simultaneously exist the cache. -/// It is chosen by the estimation of the largest contract storage size we are aware as of 23/08/2022. -const DEFAULT_SHARD_CACHE_TOTAL_SIZE_LIMIT: u64 = - if cfg!(feature = "no_cache") { 1 } else { 3_000_000_000 }; +/// Default memory limit, if nothing else is configured. +/// It is chosen to correspond roughly to the old limit, which was +/// 50k entries * TRIE_LIMIT_CACHED_VALUE_SIZE. +pub(crate) const DEFAULT_SHARD_CACHE_TOTAL_SIZE_LIMIT: u64 = + if cfg!(feature = "no_cache") { 1 } else { 50_000_000 }; /// Capacity for the deletions queue. /// It is chosen to fit all hashes of deleted nodes for 3 completely full blocks. @@ -30,8 +23,8 @@ const TRIE_LIMIT_CACHED_VALUE_SIZE: usize = 1000; /// Stores necessary configuration for the creation of tries. #[derive(Default)] pub struct TrieConfig { - pub shard_cache_config: ShardCacheConfig, - pub view_shard_cache_config: ShardCacheConfig, + pub shard_cache_config: TrieCacheConfig, + pub view_shard_cache_config: TrieCacheConfig, pub enable_receipt_prefetching: bool, /// Configured accounts will be prefetched as SWEAT token account, if predecessor is listed as sender. @@ -40,23 +33,22 @@ pub struct TrieConfig { pub sweat_prefetch_senders: Vec, } -pub struct ShardCacheConfig { - /// Shard cache capacity in number of trie nodes. - pub default_max_entries: u64, - /// Limits the memory consumption for the cache. - pub default_max_total_bytes: u64, - /// Overrides `default_max_entries` per shard. - pub override_max_entries: HashMap, - /// Overrides `default_max_total_bytes` per shard. - pub override_max_total_bytes: HashMap, -} - impl TrieConfig { - pub fn from_config(config: &StoreConfig) -> Self { - let mut this = Self::default(); - this.shard_cache_config - .override_max_entries - .extend(config.trie_cache_capacities.iter().cloned()); + /// Create a new `TrieConfig` with default values or the values specified in `StoreConfig`. + pub fn from_store_config(config: &StoreConfig) -> Self { + let mut this = TrieConfig::default(); + + if !config.trie_cache_capacities.is_empty() { + warn!(target: "store", "`trie_cache_capacities` is deprecated, use `trie_cache` and `view_trie_cache` instead"); + for (shard_uid, capacity) in &config.trie_cache_capacities { + let bytes_limit = Self::deprecated_num_entry_to_memory_limit(*capacity); + this.shard_cache_config.per_shard_max_bytes.insert(*shard_uid, bytes_limit); + } + } + + this.shard_cache_config = config.trie_cache.clone(); + this.view_shard_cache_config = config.view_trie_cache.clone(); + this.enable_receipt_prefetching = config.enable_receipt_prefetching; for account in &config.sweat_prefetch_receivers { match AccountId::from_str(account) { @@ -70,13 +62,8 @@ impl TrieConfig { Err(e) => error!(target: "config", "invalid account id {account}: {e}"), } } - this - } - /// Shard cache capacity in total bytes. - pub fn shard_cache_total_size_limit(&self, shard_uid: ShardUId, is_view: bool) -> u64 { - if is_view { &self.view_shard_cache_config } else { &self.shard_cache_config } - .total_size_limit(shard_uid) + this } /// Size limit in bytes per single value for caching in shard caches. @@ -94,42 +81,23 @@ impl TrieConfig { pub fn deletions_queue_capacity(&self) -> usize { DEFAULT_SHARD_CACHE_DELETIONS_QUEUE_CAPACITY } -} - -impl ShardCacheConfig { - // TODO(#7894): Remove this when `trie_cache_capacities` is removed from config. - fn capacity(&self, shard_uid: ShardUId) -> u64 { - self.override_max_entries.get(&shard_uid).cloned().unwrap_or(self.default_max_entries) - } - - fn total_size_limit(&self, shard_uid: ShardUId) -> u64 { - let explicit_limit = self - .override_max_total_bytes - .get(&shard_uid) - .copied() - .unwrap_or(self.default_max_total_bytes); - // As long as `trie_cache_capacities` is a config option, it should be respected. - // We no longer commit to a hard limit on this. But we make sure that the old - // worst-case assumption of how much memory would be consumed still works. - // Specifically, the old calculation ignored `PER_ENTRY_OVERHEAD` and used - // `max_cached_value_size()` only to figure out a good value for how many - // nodes we want in the cache at most. - // This implicit limit should result in the same may number of nodes and same max memory - // consumption as the old config. - // TODO(#7894): Remove this when `trie_cache_capacities` is removed from config. - let implicit_limit = self.capacity(shard_uid) - * (TrieCacheInner::PER_ENTRY_OVERHEAD + TrieConfig::max_cached_value_size() as u64); - explicit_limit.min(implicit_limit) - } -} -impl Default for ShardCacheConfig { - fn default() -> Self { - Self { - default_max_entries: TRIE_DEFAULT_SHARD_CACHE_SIZE, - default_max_total_bytes: DEFAULT_SHARD_CACHE_TOTAL_SIZE_LIMIT, - override_max_entries: HashMap::default(), - override_max_total_bytes: HashMap::default(), - } + /// Given a number of max entries in the old config format, calculate how + /// many bytes the limit should be set to such that AT LEAST THE SAME NUMBER + /// can fit. + /// + /// TODO(#7894): Remove this when `trie_cache_capacities` is removed from config. + /// + /// As long as `trie_cache_capacities` is a config option, it should be respected. + /// We no longer commit to a hard limit on this. But we make sure that the old + /// worst-case assumption of how much memory would be consumed still works. + /// Specifically, the old calculation ignored `PER_ENTRY_OVERHEAD` and used + /// `max_cached_value_size()` only to figure out a good value for how many + /// nodes we want in the cache at most. + /// This implicit limit should result in the same min number of nodes and + /// same max memory consumption as the old config. + pub(crate) fn deprecated_num_entry_to_memory_limit(max_num_entries: u64) -> u64 { + max_num_entries + * (TrieCacheInner::PER_ENTRY_OVERHEAD + TrieConfig::max_cached_value_size() as u64) } } diff --git a/core/store/src/trie/mod.rs b/core/store/src/trie/mod.rs index 44af67260f5..346348b1fd5 100644 --- a/core/store/src/trie/mod.rs +++ b/core/store/src/trie/mod.rs @@ -17,6 +17,7 @@ use near_primitives::types::{StateRoot, StateRootNode}; use crate::flat_state::FlatState; pub use crate::trie::config::TrieConfig; +pub(crate) use crate::trie::config::DEFAULT_SHARD_CACHE_TOTAL_SIZE_LIMIT; use crate::trie::insert_delete::NodesStorage; use crate::trie::iterator::TrieIterator; pub use crate::trie::nibble_slice::NibbleSlice; diff --git a/core/store/src/trie/trie_storage.rs b/core/store/src/trie/trie_storage.rs index 2c85d01f52f..cf704e2f136 100644 --- a/core/store/src/trie/trie_storage.rs +++ b/core/store/src/trie/trie_storage.rs @@ -233,7 +233,13 @@ pub struct TrieCache(pub(crate) Arc>); impl TrieCache { pub fn new(config: &TrieConfig, shard_uid: ShardUId, is_view: bool) -> Self { - let total_size_limit = config.shard_cache_total_size_limit(shard_uid, is_view); + let cache_config = + if is_view { &config.view_shard_cache_config } else { &config.shard_cache_config }; + let total_size_limit = cache_config + .per_shard_max_bytes + .get(&shard_uid) + .copied() + .unwrap_or(cache_config.default_max_bytes); let queue_capacity = config.deletions_queue_capacity(); Self(Arc::new(Mutex::new(TrieCacheInner::new( queue_capacity, @@ -664,7 +670,10 @@ mod bounded_queue_tests { #[cfg(test)] mod trie_cache_tests { use crate::trie::trie_storage::TrieCacheInner; + use crate::{StoreConfig, TrieCache, TrieConfig}; use near_primitives::hash::hash; + use near_primitives::shard_layout::ShardUId; + use near_primitives::types::ShardId; fn put_value(cache: &mut TrieCacheInner, value: &[u8]) { cache.put(hash(value), value.into()); @@ -733,4 +742,40 @@ mod trie_cache_tests { assert!(!cache.cache.contains(&hash(&[2, 3, 4]))); assert!(cache.cache.contains(&hash(&[3, 4, 5]))); } + + /// Check that setting from `StoreConfig` are applied. + #[test] + fn test_trie_config() { + let mut store_config = StoreConfig::default(); + + const DEFAULT_SIZE: u64 = 1; + const S0_SIZE: u64 = 2; + const DEFAULT_VIEW_SIZE: u64 = 3; + const S0_VIEW_SIZE: u64 = 4; + + let s0 = ShardUId::single_shard(); + store_config.trie_cache.default_max_bytes = DEFAULT_SIZE; + store_config.trie_cache.per_shard_max_bytes.insert(s0, S0_SIZE); + store_config.view_trie_cache.default_max_bytes = DEFAULT_VIEW_SIZE; + store_config.view_trie_cache.per_shard_max_bytes.insert(s0, S0_VIEW_SIZE); + let trie_config = TrieConfig::from_store_config(&store_config); + + check_cache_size(&trie_config, 1, false, DEFAULT_SIZE); + check_cache_size(&trie_config, 0, false, S0_SIZE); + check_cache_size(&trie_config, 1, true, DEFAULT_VIEW_SIZE); + check_cache_size(&trie_config, 0, true, S0_VIEW_SIZE); + } + + #[track_caller] + fn check_cache_size( + trie_config: &TrieConfig, + shard_id: ShardId, + is_view: bool, + expected_size: u64, + ) { + let shard_uid = ShardUId { version: 0, shard_id: shard_id as u32 }; + let trie_cache = TrieCache::new(&trie_config, shard_uid, is_view); + assert_eq!(expected_size, trie_cache.0.lock().unwrap().total_size_limit,); + assert_eq!(is_view, trie_cache.0.lock().unwrap().is_view,); + } } diff --git a/core/store/src/trie/trie_tests.rs b/core/store/src/trie/trie_tests.rs index 0b17a318866..bbb2460c6c0 100644 --- a/core/store/src/trie/trie_tests.rs +++ b/core/store/src/trie/trie_tests.rs @@ -348,7 +348,7 @@ mod caching_storage_tests { let shard_uid = ShardUId::single_shard(); let store = create_store_with_values(&values, shard_uid); let mut trie_config = TrieConfig::default(); - trie_config.shard_cache_config.override_max_total_bytes.insert(shard_uid, shard_cache_size); + trie_config.shard_cache_config.per_shard_max_bytes.insert(shard_uid, shard_cache_size); let trie_cache = TrieCache::new(&trie_config, shard_uid, false); let trie_caching_storage = TrieCachingStorage::new(store, trie_cache.clone(), shard_uid, false, None); diff --git a/nearcore/src/runtime/mod.rs b/nearcore/src/runtime/mod.rs index 70492930b3c..10c4ce19aad 100644 --- a/nearcore/src/runtime/mod.rs +++ b/nearcore/src/runtime/mod.rs @@ -96,8 +96,6 @@ pub struct NightshadeRuntime { impl NightshadeRuntime { pub fn from_config(home_dir: &Path, store: Store, config: &NearConfig) -> Self { - let trie_config = TrieConfig::from_config(&config.config.store); - Self::new( home_dir, store, @@ -107,7 +105,7 @@ impl NightshadeRuntime { config.client_config.max_gas_burnt_view, None, config.config.gc.gc_num_epochs_to_keep(), - trie_config, + TrieConfig::from_store_config(&config.config.store), ) } From 7dff269a519488060848f19feff388b58e3d6e29 Mon Sep 17 00:00:00 2001 From: Alex Kladov Date: Mon, 31 Oct 2022 11:42:09 +0000 Subject: [PATCH 053/103] refactor: cleanup ed25519_verify (#7955) * Align doc comment with other host functions (not that we are super-consistent here) * Align code style with other host functions * Reduce some duplication from tests * Add tricky test-cases where a value of the wrong length is read from the register. https://github.com/near/nearcore/issues/7567 --- runtime/near-vm-logic/src/logic.rs | 100 ++-- .../near-vm-logic/src/tests/ed25519_verify.rs | 554 ++++++++++-------- 2 files changed, 361 insertions(+), 293 deletions(-) diff --git a/runtime/near-vm-logic/src/logic.rs b/runtime/near-vm-logic/src/logic.rs index 53bce2b9b26..f6438e40a41 100644 --- a/runtime/near-vm-logic/src/logic.rs +++ b/runtime/near-vm-logic/src/logic.rs @@ -1122,71 +1122,75 @@ impl<'a> VMLogic<'a> { } /// Verify an ED25519 signature given a message and a public key. - /// # Returns - /// - 1 meaning the boolean expression true to encode that the signature was properly verified - /// - 0 meaning the boolean expression false to encode that the signature failed to be verified /// - /// # Cost + /// Returns a bool indicating success (1) or failure (0) as a `u64`. + /// + /// # Errors /// - /// Each input can either be in memory or in a register. Set the length of the input to `u64::MAX` - /// to declare that the input is a register number and not a pointer. - /// Each input has a gas cost input_cost(num_bytes) that depends on whether it is from memory - /// or from a register. It is either read_memory_base + num_bytes * read_memory_byte in the - /// former case or read_register_base + num_bytes * read_register_byte in the latter. This function - /// is labeled as `input_cost` below. + /// * If the public key's size is not equal to 32, or signature size is not + /// equal to 64, returns [HostError::Ed25519VerifyInvalidInput]. + /// * If any of the signature, message or public key arguments are out of + /// memory bounds, returns [`HostError::MemoryAccessViolation`] /// - /// `input_cost(num_bytes_signature) + input_cost(num_bytes_message) + input_cost(num_bytes_public_key) + - /// ed25519_verify_base + ed25519_verify_byte * num_bytes_message` + /// # Cost /// - /// # Error + /// Each input can either be in memory or in a register. Set the length of + /// the input to `u64::MAX` to declare that the input is a register number + /// and not a pointer. Each input has a gas cost input_cost(num_bytes) that + /// depends on whether it is from memory or from a register. It is either + /// read_memory_base + num_bytes * read_memory_byte in the former case or + /// read_register_base + num_bytes * read_register_byte in the latter. This + /// function is labeled as `input_cost` below. /// - /// If the public key's size is not equal to 32 returns [HostError::Ed25519VerifyInvalidInput]. - /// If the signature size is not equal to 64 returns [HostError::Ed25519VerifyInvalidInput]. - + /// `input_cost(num_bytes_signature) + input_cost(num_bytes_message) + + /// input_cost(num_bytes_public_key) + ed25519_verify_base + + /// ed25519_verify_byte * num_bytes_message` #[cfg(feature = "protocol_feature_ed25519_verify")] pub fn ed25519_verify( &mut self, - sig_len: u64, - sig_ptr: u64, - msg_len: u64, - msg_ptr: u64, - pub_key_len: u64, - pub_key_ptr: u64, + signature_len: u64, + signature_ptr: u64, + message_len: u64, + message_ptr: u64, + public_key_len: u64, + public_key_ptr: u64, ) -> Result { - use ed25519_dalek::{PublicKey, Signature, Verifier, PUBLIC_KEY_LENGTH, SIGNATURE_LENGTH}; + use ed25519_dalek::Verifier; self.gas_counter.pay_base(ed25519_verify_base)?; - let signature_array = self.get_vec_from_memory_or_register(sig_ptr, sig_len)?; - if signature_array.len() != SIGNATURE_LENGTH { - return Err(VMLogicError::HostError(HostError::Ed25519VerifyInvalidInput { - msg: "invalid signature length".to_string(), - })); - } - - let signature = match Signature::from_bytes(&signature_array) { - Ok(signature) => signature, - Err(_) => return Ok(0), + let signature: ed25519_dalek::Signature = { + let vec = self.get_vec_from_memory_or_register(signature_ptr, signature_len)?; + if vec.len() != ed25519_dalek::SIGNATURE_LENGTH { + return Err(VMLogicError::HostError(HostError::Ed25519VerifyInvalidInput { + msg: "invalid signature length".to_string(), + })); + } + match ed25519_dalek::Signature::from_bytes(&vec) { + Ok(signature) => signature, + Err(_) => return Ok(false as u64), + } }; - let msg = self.get_vec_from_memory_or_register(msg_ptr, msg_len)?; - let num_bytes = msg.len(); - self.gas_counter.pay_per(ed25519_verify_byte, num_bytes as _)?; + let message = self.get_vec_from_memory_or_register(message_ptr, message_len)?; + self.gas_counter.pay_per(ed25519_verify_byte, message.len() as u64)?; - let pub_key_array = self.get_vec_from_memory_or_register(pub_key_ptr, pub_key_len)?; - if pub_key_array.len() != PUBLIC_KEY_LENGTH { - return Err(VMLogicError::HostError(HostError::Ed25519VerifyInvalidInput { - msg: "invalid public key length".to_string(), - })); - } - let pub_key = match PublicKey::from_bytes(&pub_key_array) { - Ok(pub_key) => pub_key, - Err(_) => return Ok(0), + let public_key: ed25519_dalek::PublicKey = { + let vec = self.get_vec_from_memory_or_register(public_key_ptr, public_key_len)?; + if vec.len() != ed25519_dalek::PUBLIC_KEY_LENGTH { + return Err(VMLogicError::HostError(HostError::Ed25519VerifyInvalidInput { + msg: "invalid public key length".to_string(), + })); + } + match ed25519_dalek::PublicKey::from_bytes(&vec) { + Ok(public_key) => public_key, + Err(_) => return Ok(false as u64), + } }; - match pub_key.verify(&msg, &signature) { - Err(_) => Ok(0), - Ok(()) => Ok(1), + match public_key.verify(&message, &signature) { + Err(_) => Ok(false as u64), + Ok(()) => Ok(true as u64), } } diff --git a/runtime/near-vm-logic/src/tests/ed25519_verify.rs b/runtime/near-vm-logic/src/tests/ed25519_verify.rs index edc04d02fd3..cb563ab08f1 100644 --- a/runtime/near-vm-logic/src/tests/ed25519_verify.rs +++ b/runtime/near-vm-logic/src/tests/ed25519_verify.rs @@ -1,288 +1,352 @@ use crate::tests::fixtures::get_context; use crate::tests::helpers::*; use crate::tests::vm_logic_builder::VMLogicBuilder; -use crate::VMLogic; use crate::{map, ExtCosts}; use near_vm_errors::HostError; use near_vm_errors::VMLogicError; - use std::collections::HashMap; -fn create_signature() -> [u8; 64] { - [ - 145, 193, 203, 18, 114, 227, 14, 117, 33, 213, 121, 66, 130, 14, 25, 4, 36, 120, 46, 142, - 226, 215, 7, 66, 122, 112, 97, 30, 249, 135, 61, 165, 221, 249, 252, 23, 105, 40, 56, 70, - 31, 152, 236, 141, 154, 122, 207, 20, 75, 118, 79, 90, 168, 6, 221, 122, 213, 29, 126, 196, - 216, 104, 191, 6, - ] -} +const SIGNATURE: [u8; 64] = [ + 145, 193, 203, 18, 114, 227, 14, 117, 33, 213, 121, 66, 130, 14, 25, 4, 36, 120, 46, 142, 226, + 215, 7, 66, 122, 112, 97, 30, 249, 135, 61, 165, 221, 249, 252, 23, 105, 40, 56, 70, 31, 152, + 236, 141, 154, 122, 207, 20, 75, 118, 79, 90, 168, 6, 221, 122, 213, 29, 126, 196, 216, 104, + 191, 6, +]; -fn create_public_key() -> [u8; 32] { - [ - 32, 122, 6, 120, 146, 130, 30, 37, 215, 112, 241, 251, 160, 196, 124, 17, 255, 75, 129, 62, - 84, 22, 46, 206, 158, 184, 57, 224, 118, 35, 26, 182, - ] -} +const BAD_SIGNATURE: [u8; 64] = [1; 64]; + +// create a forged signature with the `s` scalar not properly reduced +// https://docs.rs/ed25519/latest/src/ed25519/lib.rs.html#302 +const FORGED_SIGNATURE: [u8; 64] = { + let mut sig = SIGNATURE; + sig[63] = 0b1110_0001; + sig +}; + +const PUBLIC_KEY: [u8; 32] = [ + 32, 122, 6, 120, 146, 130, 30, 37, 215, 112, 241, 251, 160, 196, 124, 17, 255, 75, 129, 62, 84, + 22, 46, 206, 158, 184, 57, 224, 118, 35, 26, 182, +]; + +// create a forged public key to force a PointDecompressionError +// https://docs.rs/ed25519-dalek/latest/src/ed25519_dalek/public.rs.html#142 +const FORGED_PUBLIC_KEY: [u8; 32] = { + let mut key = PUBLIC_KEY; + key[31] = 0b1110_0001; + key +}; + +// 32 bytes message +const MESSAGE: [u8; 32] = [ + 107, 97, 106, 100, 108, 102, 107, 106, 97, 108, 107, 102, 106, 97, 107, 108, 102, 106, 100, + 107, 108, 97, 100, 106, 102, 107, 108, 106, 97, 100, 115, 107, +]; #[track_caller] fn check_ed25519_verify( - logic: &mut VMLogic, - signature_len: usize, + signature_len: u64, signature: &[u8], - message_len: usize, + message_len: u64, message: &[u8], - public_key_len: usize, + public_key_len: u64, public_key: &[u8], - want: Result, + want: Result, want_costs: HashMap, ) { - let result = logic.ed25519_verify( - signature_len as _, - signature.as_ptr() as _, - message_len as _, - message.as_ptr() as _, - public_key_len as _, - public_key.as_ptr() as _, - ); - - assert_eq!(want, result); - assert_costs(want_costs); -} - -#[test] -fn test_ed25519_verify_behavior_and_errors() { let mut logic_builder = VMLogicBuilder::default(); let mut logic = logic_builder.build(get_context(vec![], false)); - let signature = create_signature(); - let bad_signature: [u8; 64] = [1; 64]; - - let mut forged_signature = signature.clone(); - // create a forged signature with the `s` scalar not properly reduced - // https://docs.rs/ed25519/latest/src/ed25519/lib.rs.html#302 - forged_signature[63] = 0b1110_0001; - - let public_key = create_public_key(); - - let mut forged_public_key = public_key.clone(); - // create a forged public key to force a PointDecompressionError - // https://docs.rs/ed25519-dalek/latest/src/ed25519_dalek/public.rs.html#142 - forged_public_key[31] = 0b1110_0001; + let signature_ptr = if signature_len == u64::MAX { + logic.wrapped_internal_write_register(1, &signature).unwrap(); + 1 + } else { + signature.as_ptr() as u64 + }; - // 32 bytes message - let message: [u8; 32] = [ - 107, 97, 106, 100, 108, 102, 107, 106, 97, 108, 107, 102, 106, 97, 107, 108, 102, 106, 100, - 107, 108, 97, 100, 106, 102, 107, 108, 106, 97, 100, 115, 107, - ]; + let message_ptr = if message_len == u64::MAX { + logic.wrapped_internal_write_register(2, &message).unwrap(); + 2 + } else { + message.as_ptr() as u64 + }; - let scenarios = [ - ( - signature.len(), - signature.clone(), - message.len(), - message.as_slice(), - public_key.len(), - public_key.clone(), - Ok(1), - map! { - ExtCosts::read_memory_byte: 128, - ExtCosts::read_memory_base: 3, - ExtCosts::ed25519_verify_base: 1, - ExtCosts::ed25519_verify_byte: 32, - }, - ), - ( - signature.len(), - signature.clone(), - message.len(), - message.as_slice(), - public_key.len(), - forged_public_key.clone(), - Ok(0), - map! { - ExtCosts::read_memory_byte: 128, - ExtCosts::read_memory_base: 3, - ExtCosts::ed25519_verify_base: 1, - ExtCosts::ed25519_verify_byte: 32, - }, - ), - ( - signature.len(), - signature.clone(), - message.len(), - message.as_slice(), - public_key.len() - 1, - public_key.clone(), - Err(VMLogicError::HostError(HostError::Ed25519VerifyInvalidInput { - msg: "invalid public key length".to_string(), - })), - map! { - ExtCosts::read_memory_byte: 127, - ExtCosts::read_memory_base: 3, - ExtCosts::ed25519_verify_base: 1, - ExtCosts::ed25519_verify_byte: 32, - }, - ), - ( - bad_signature.len(), - bad_signature.clone(), - message.len(), - message.as_slice(), - public_key.len(), - public_key.clone(), - Ok(0), - map! { - ExtCosts::read_memory_byte: 128, - ExtCosts::read_memory_base: 3, - ExtCosts::ed25519_verify_base: 1, - ExtCosts::ed25519_verify_byte: 32, - }, - ), - ( - signature.len() - 1, - signature.clone(), - message.len(), - message.as_slice(), - public_key.len(), - public_key.clone(), - Err(VMLogicError::HostError(HostError::Ed25519VerifyInvalidInput { - msg: "invalid signature length".to_string(), - })), - map! { - ExtCosts::read_memory_base: 1, - ExtCosts::read_memory_byte: 63, - ExtCosts::ed25519_verify_base: 1, - }, - ), - ( - forged_signature.len(), - forged_signature.clone(), - message.len(), - message.as_slice(), - public_key.len(), - public_key.clone(), - Ok(0), - map! { - ExtCosts::read_memory_base: 1, - ExtCosts::read_memory_byte: 64, - ExtCosts::ed25519_verify_base: 1, - }, - ), - ( - forged_signature.len(), - forged_signature.clone(), - 0, - message.as_slice(), - public_key.len(), - public_key.clone(), - Ok(0), - map! { - ExtCosts::read_memory_base: 1, - ExtCosts::read_memory_byte: 64, - ExtCosts::ed25519_verify_base: 1, - }, - ), - ]; + let public_key_ptr = if public_key_len == u64::MAX { + logic.wrapped_internal_write_register(3, &public_key).unwrap(); + 3 + } else { + public_key.as_ptr() as u64 + }; - for ( + let result = logic.ed25519_verify( signature_len, - signature, + signature_ptr, message_len, - message, + message_ptr, public_key_len, - public_key, - expected_result, - want_costs, - ) in scenarios - { - check_ed25519_verify( - &mut logic, - signature_len as _, - signature.as_ref(), - message_len as _, - message.as_ref(), - public_key_len as _, - public_key.as_ref(), - expected_result, - want_costs, - ); - } + public_key_ptr, + ); + + let want = want.map_err(VMLogicError::HostError); + assert_eq!(want, result); + assert_costs(want_costs); } #[test] -fn test_ed25519_verify_check_registers() { - let mut logic_builder = VMLogicBuilder::default(); - let mut logic = logic_builder.build(get_context(vec![], false)); - - let signature = create_signature(); - let public_key = create_public_key(); - - let bad_signature: [u8; 64] = [1; 64]; - - // 32 bytes message - let message: [u8; 32] = [ - 107, 97, 106, 100, 108, 102, 107, 106, 97, 108, 107, 102, 106, 97, 107, 108, 102, 106, 100, - 107, 108, 97, 100, 106, 102, 107, 108, 106, 97, 100, 115, 107, - ]; - - let mut forged_signature = signature.clone(); - // create a forged signature with the `s` scalar not properly reduced - // https://docs.rs/ed25519/latest/src/ed25519/lib.rs.html#302 - forged_signature[63] = 0b1110_0001; - - // tests for data beingn read from registers - logic.wrapped_internal_write_register(1, &signature).unwrap(); - let result = logic.ed25519_verify( - u64::MAX, - 1 as _, - message.len() as _, - message.as_ptr() as _, - public_key.len() as _, - public_key.as_ptr() as _, +fn test_ed25519_verify_behavior_and_errors() { + check_ed25519_verify( + SIGNATURE.len() as u64, + &SIGNATURE, + MESSAGE.len() as u64, + &MESSAGE, + PUBLIC_KEY.len() as u64, + &PUBLIC_KEY, + Ok(1), + map! { + ExtCosts::read_memory_byte: 128, + ExtCosts::read_memory_base: 3, + ExtCosts::ed25519_verify_base: 1, + ExtCosts::ed25519_verify_byte: 32, + }, + ); + check_ed25519_verify( + SIGNATURE.len() as u64, + &SIGNATURE, + MESSAGE.len() as u64, + &MESSAGE, + PUBLIC_KEY.len() as u64, + &FORGED_PUBLIC_KEY, + Ok(0), + map! { + ExtCosts::read_memory_byte: 128, + ExtCosts::read_memory_base: 3, + ExtCosts::ed25519_verify_base: 1, + ExtCosts::ed25519_verify_byte: 32, + }, + ); + check_ed25519_verify( + SIGNATURE.len() as u64, + &SIGNATURE, + MESSAGE.len() as u64, + &MESSAGE, + PUBLIC_KEY.len() as u64 - 1, + &PUBLIC_KEY, + Err(HostError::Ed25519VerifyInvalidInput { msg: "invalid public key length".to_string() }), + map! { + ExtCosts::read_memory_byte: 127, + ExtCosts::read_memory_base: 3, + ExtCosts::ed25519_verify_base: 1, + ExtCosts::ed25519_verify_byte: 32, + }, + ); + check_ed25519_verify( + BAD_SIGNATURE.len() as u64, + &BAD_SIGNATURE, + MESSAGE.len() as u64, + &MESSAGE, + PUBLIC_KEY.len() as u64, + &PUBLIC_KEY, + Ok(0), + map! { + ExtCosts::read_memory_byte: 128, + ExtCosts::read_memory_base: 3, + ExtCosts::ed25519_verify_base: 1, + ExtCosts::ed25519_verify_byte: 32, + }, ); - assert_eq!(Ok(1u64), result); + check_ed25519_verify( + SIGNATURE.len() as u64 - 1, + &SIGNATURE, + MESSAGE.len() as u64, + &MESSAGE, + PUBLIC_KEY.len() as u64, + &PUBLIC_KEY, + Err(HostError::Ed25519VerifyInvalidInput { msg: "invalid signature length".to_string() }), + map! { + ExtCosts::read_memory_base: 1, + ExtCosts::read_memory_byte: 63, + ExtCosts::ed25519_verify_base: 1, + }, + ); + check_ed25519_verify( + FORGED_SIGNATURE.len() as u64, + &FORGED_SIGNATURE, + MESSAGE.len() as u64, + &MESSAGE, + PUBLIC_KEY.len() as u64, + &PUBLIC_KEY, + Ok(0), + map! { + ExtCosts::read_memory_base: 1, + ExtCosts::read_memory_byte: 64, + ExtCosts::ed25519_verify_base: 1, + }, + ); + check_ed25519_verify( + FORGED_SIGNATURE.len() as u64, + &FORGED_SIGNATURE, + 0, + &[], + PUBLIC_KEY.len() as u64, + &PUBLIC_KEY, + Ok(0), + map! { + ExtCosts::read_memory_base: 1, + ExtCosts::read_memory_byte: 64, + ExtCosts::ed25519_verify_base: 1, + }, + ); + check_ed25519_verify( + SIGNATURE.len() as u64, + &SIGNATURE, + 0, + &[], + PUBLIC_KEY.len() as u64, + &PUBLIC_KEY, + Ok(0), + map! { + ExtCosts::read_memory_base: 3, + ExtCosts::read_memory_byte: 96, + ExtCosts::ed25519_verify_base: 1, + ExtCosts::ed25519_verify_byte: 0, + }, + ); +} - logic.wrapped_internal_write_register(1, &bad_signature).unwrap(); - let result = logic.ed25519_verify( +// tests for data being read from registers +#[test] +fn test_ed25519_verify_check_registers() { + check_ed25519_verify( u64::MAX, - 1 as _, - message.len() as _, - message.as_ptr() as _, - public_key.len() as _, - public_key.as_ptr() as _, + &SIGNATURE, + MESSAGE.len() as u64, + &MESSAGE, + PUBLIC_KEY.len() as u64, + &PUBLIC_KEY, + Ok(1), + map! { + ExtCosts::write_register_base: 1, + ExtCosts::write_register_byte: 64, + + ExtCosts::read_register_base: 1, + ExtCosts::read_register_byte: 64, + ExtCosts::read_memory_base: 2, + ExtCosts::read_memory_byte: 64, + ExtCosts::ed25519_verify_base: 1, + ExtCosts::ed25519_verify_byte: 32, + }, ); - assert_eq!(Ok(0), result); + check_ed25519_verify( + SIGNATURE.len() as u64, + &SIGNATURE, + u64::MAX, + &MESSAGE, + PUBLIC_KEY.len() as u64, + &PUBLIC_KEY, + Ok(1), + map! { + ExtCosts::write_register_base: 1, + ExtCosts::write_register_byte: 32, - logic.wrapped_internal_write_register(1, &forged_signature).unwrap(); - let result = logic.ed25519_verify( + ExtCosts::read_register_base: 1, + ExtCosts::read_register_byte: 32, + ExtCosts::read_memory_base: 2, + ExtCosts::read_memory_byte: 96, + ExtCosts::ed25519_verify_base: 1, + ExtCosts::ed25519_verify_byte: 32, + }, + ); + check_ed25519_verify( + SIGNATURE.len() as u64, + &SIGNATURE, + MESSAGE.len() as u64, + &MESSAGE, u64::MAX, - 1 as _, - message.len() as _, - message.as_ptr() as _, - public_key.len() as _, - public_key.as_ptr() as _, + &PUBLIC_KEY, + Ok(1), + map! { + ExtCosts::write_register_base: 1, + ExtCosts::write_register_byte: 32, + + ExtCosts::read_register_byte: 32, + ExtCosts::read_register_base: 1, + ExtCosts::read_memory_base: 2, + ExtCosts::read_memory_byte: 96, + ExtCosts::ed25519_verify_base: 1, + ExtCosts::ed25519_verify_byte: 32, + }, ); - assert_eq!(Ok(0), result); + check_ed25519_verify( + u64::MAX, + &BAD_SIGNATURE, + MESSAGE.len() as u64, + &MESSAGE, + PUBLIC_KEY.len() as u64, + &PUBLIC_KEY, + Ok(0), + map! { + ExtCosts::write_register_base: 1, + ExtCosts::write_register_byte: 64, - logic.wrapped_internal_write_register(1, &message).unwrap(); - let result = logic.ed25519_verify( - signature.len() as _, - signature.as_ptr() as _, + ExtCosts::read_register_base: 1, + ExtCosts::read_register_byte: 64, + ExtCosts::read_memory_base: 2, + ExtCosts::read_memory_byte: 64, + ExtCosts::ed25519_verify_byte: 32, + ExtCosts::ed25519_verify_base: 1, + }, + ); + check_ed25519_verify( u64::MAX, - 1, - public_key.len() as _, - public_key.as_ptr() as _, + &FORGED_SIGNATURE, + MESSAGE.len() as u64, + &MESSAGE, + PUBLIC_KEY.len() as u64, + &PUBLIC_KEY, + Ok(0), + map! { + ExtCosts::write_register_base: 1, + ExtCosts::write_register_byte: 64, + + ExtCosts::read_register_base: 1, + ExtCosts::read_register_byte: 64, + ExtCosts::ed25519_verify_base: 1, + }, ); - assert_eq!(Ok(1), result); + check_ed25519_verify( + u64::MAX, + &[0], + MESSAGE.len() as u64, + &MESSAGE, + PUBLIC_KEY.len() as u64, + &PUBLIC_KEY, + Err(HostError::Ed25519VerifyInvalidInput { msg: "invalid signature length".to_string() }), + map! { + ExtCosts::write_register_base: 1, + ExtCosts::write_register_byte: 1, - logic.wrapped_internal_write_register(1, &public_key).unwrap(); - let result = logic.ed25519_verify( - signature.len() as _, - signature.as_ptr() as _, - message.len() as _, - message.as_ptr() as _, + ExtCosts::read_register_base: 1, + ExtCosts::read_register_byte: 1, + ExtCosts::ed25519_verify_base: 1, + }, + ); + check_ed25519_verify( + SIGNATURE.len() as u64, + &SIGNATURE, + MESSAGE.len() as u64, + &MESSAGE, u64::MAX, - 1, + &[0], + Err(HostError::Ed25519VerifyInvalidInput { msg: "invalid public key length".to_string() }), + map! { + ExtCosts::write_register_base: 1, + ExtCosts::write_register_byte: 1, + + ExtCosts::read_register_base: 1, + ExtCosts::read_register_byte: 1, + ExtCosts::read_memory_base: 2, + ExtCosts::read_memory_byte: 96, + ExtCosts::ed25519_verify_byte: 32, + ExtCosts::ed25519_verify_base: 1, + }, ); - assert_eq!(Ok(1), result); } From 501c4dbc0949df7988979d2a99f22e088b53ac37 Mon Sep 17 00:00:00 2001 From: mm-near <91919554+mm-near@users.noreply.github.com> Date: Mon, 31 Oct 2022 13:10:47 +0100 Subject: [PATCH 054/103] Speedy-sync - experimental tool for faster syncing (#6914) See README.md for details. It can be used as a way to sync a lot faster (before we have Epoch Sync enabled) --- Cargo.lock | 17 ++ Cargo.toml | 1 + chain/epoch-manager/src/lib.rs | 2 +- core/primitives/src/epoch_manager.rs | 24 ++- core/primitives/src/merkle.rs | 2 +- tools/speedy_sync/Cargo.toml | 24 +++ tools/speedy_sync/README.md | 62 ++++++ tools/speedy_sync/src/main.rs | 276 +++++++++++++++++++++++++++ 8 files changed, 398 insertions(+), 10 deletions(-) create mode 100644 tools/speedy_sync/Cargo.toml create mode 100644 tools/speedy_sync/README.md create mode 100644 tools/speedy_sync/src/main.rs diff --git a/Cargo.lock b/Cargo.lock index 52ed97cee5c..486ae803f68 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5368,6 +5368,23 @@ dependencies = [ "winapi", ] +[[package]] +name = "speedy_sync" +version = "0.0.0" +dependencies = [ + "borsh", + "clap 3.1.18", + "near-chain", + "near-chain-configs", + "near-chain-primitives", + "near-epoch-manager", + "near-primitives", + "near-store", + "nearcore", + "serde", + "serde_json", +] + [[package]] name = "spin" version = "0.5.2" diff --git a/Cargo.toml b/Cargo.toml index ac5592fca3a..20ba0360605 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -51,6 +51,7 @@ members = [ "tools/rpctypegen/core", "tools/rpctypegen/macro", "tools/state-viewer", + "tools/speedy_sync", "tools/storage-usage-delta-calculator", "tools/themis", "utils/mainnet-res", diff --git a/chain/epoch-manager/src/lib.rs b/chain/epoch-manager/src/lib.rs index 9c5fa0e2930..23c696e7c28 100644 --- a/chain/epoch-manager/src/lib.rs +++ b/chain/epoch-manager/src/lib.rs @@ -41,7 +41,7 @@ mod shard_assignment; pub mod test_utils; #[cfg(test)] mod tests; -mod types; +pub mod types; mod validator_selection; const EPOCH_CACHE_SIZE: usize = if cfg!(feature = "no_cache") { 1 } else { 50 }; diff --git a/core/primitives/src/epoch_manager.rs b/core/primitives/src/epoch_manager.rs index 4c9fc3108ae..605b66d1677 100644 --- a/core/primitives/src/epoch_manager.rs +++ b/core/primitives/src/epoch_manager.rs @@ -153,12 +153,13 @@ pub mod block_info { use borsh::{BorshDeserialize, BorshSerialize}; use near_primitives_core::hash::CryptoHash; use near_primitives_core::types::{AccountId, Balance, BlockHeight, ProtocolVersion}; + use serde::Serialize; use std::collections::HashMap; pub use super::BlockInfoV1; /// Information per each block. - #[derive(BorshSerialize, BorshDeserialize, Eq, PartialEq, Clone, Debug)] + #[derive(BorshSerialize, BorshDeserialize, Eq, PartialEq, Clone, Debug, Serialize)] pub enum BlockInfo { V1(BlockInfoV1), V2(BlockInfoV2), @@ -341,7 +342,7 @@ pub mod block_info { } // V1 -> V2: Use versioned ValidatorStake structure in proposals - #[derive(Default, BorshSerialize, BorshDeserialize, Eq, PartialEq, Clone, Debug)] + #[derive(Default, BorshSerialize, BorshDeserialize, Eq, PartialEq, Clone, Debug, Serialize)] pub struct BlockInfoV2 { pub hash: CryptoHash, pub height: BlockHeight, @@ -363,7 +364,7 @@ pub mod block_info { } /// Information per each block. -#[derive(Default, BorshSerialize, BorshDeserialize, Eq, PartialEq, Clone, Debug)] +#[derive(Default, BorshSerialize, BorshDeserialize, Eq, PartialEq, Clone, Debug, Serialize)] pub struct BlockInfoV1 { pub hash: CryptoHash, pub height: BlockHeight, @@ -422,7 +423,7 @@ impl BlockInfoV1 { } } -#[derive(Default, BorshSerialize, BorshDeserialize, Clone, Debug, PartialEq, Eq)] +#[derive(Default, BorshSerialize, BorshDeserialize, Clone, Debug, PartialEq, Eq, Serialize)] pub struct ValidatorWeight(ValidatorId, u64); pub mod epoch_info { @@ -435,6 +436,7 @@ pub mod epoch_info { use near_primitives_core::types::{ AccountId, Balance, EpochHeight, ProtocolVersion, ValidatorId, }; + use serde::Serialize; use smart_default::SmartDefault; use std::collections::{BTreeMap, HashMap}; @@ -447,7 +449,7 @@ pub mod epoch_info { pub use super::EpochInfoV1; /// Information per epoch. - #[derive(BorshSerialize, BorshDeserialize, Clone, Debug, PartialEq, Eq)] + #[derive(BorshSerialize, BorshDeserialize, Clone, Debug, PartialEq, Eq, Serialize)] pub enum EpochInfo { V1(EpochInfoV1), V2(EpochInfoV2), @@ -461,7 +463,9 @@ pub mod epoch_info { } // V1 -> V2: Use versioned ValidatorStake structure in validators and fishermen - #[derive(SmartDefault, BorshSerialize, BorshDeserialize, Clone, Debug, PartialEq, Eq)] + #[derive( + SmartDefault, BorshSerialize, BorshDeserialize, Clone, Debug, PartialEq, Eq, Serialize, + )] pub struct EpochInfoV2 { /// Ordinal of given epoch from genesis. /// There can be multiple epochs with the same ordinal in case of long forks. @@ -497,7 +501,9 @@ pub mod epoch_info { // V2 -> V3: Structures for randomly selecting validators at each height based on new // block producer and chunk producer selection algorithm. - #[derive(SmartDefault, BorshSerialize, BorshDeserialize, Clone, Debug, PartialEq, Eq)] + #[derive( + SmartDefault, BorshSerialize, BorshDeserialize, Clone, Debug, PartialEq, Eq, Serialize, + )] pub struct EpochInfoV3 { pub epoch_height: EpochHeight, pub validators: Vec, @@ -879,7 +885,9 @@ pub mod epoch_info { } /// Information per epoch. -#[derive(SmartDefault, BorshSerialize, BorshDeserialize, Clone, Debug, PartialEq, Eq)] +#[derive( + SmartDefault, BorshSerialize, BorshDeserialize, Clone, Debug, PartialEq, Eq, Serialize, +)] pub struct EpochInfoV1 { /// Ordinal of given epoch from genesis. /// There can be multiple epochs with the same ordinal in case of long forks. diff --git a/core/primitives/src/merkle.rs b/core/primitives/src/merkle.rs index 5d0283fc8f8..37338c156d9 100644 --- a/core/primitives/src/merkle.rs +++ b/core/primitives/src/merkle.rs @@ -124,7 +124,7 @@ pub fn compute_root_from_path_and_item( /// The root can be computed by folding `path` from right but is not explicitly /// maintained to save space. /// The size of the object is O(log(n)) where n is the number of leaves in the tree, i.e, `size`. -#[derive(Default, Clone, BorshSerialize, BorshDeserialize, Eq, PartialEq, Debug)] +#[derive(Default, Clone, BorshSerialize, BorshDeserialize, Eq, PartialEq, Debug, Serialize)] pub struct PartialMerkleTree { /// Path for the next leaf. path: Vec, diff --git a/tools/speedy_sync/Cargo.toml b/tools/speedy_sync/Cargo.toml new file mode 100644 index 00000000000..729e1cd1ed9 --- /dev/null +++ b/tools/speedy_sync/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "speedy_sync" +version = "0.0.0" +authors.workspace = true +publish = false +rust-version.workspace = true +edition.workspace = true + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +near-store = { path = "../../core/store" } +near-chain-primitives = { path = "../../chain/chain-primitives" } +near-primitives = { path = "../../core/primitives" } +nearcore = { path = "../../nearcore" } +near-chain-configs = { path = "../../core/chain-configs" } +near-chain = { path = "../../chain/chain"} +near-epoch-manager = {path = "../../chain/epoch-manager" } + +borsh = "0.9" +serde = { version = "1.0.137", features = ["derive"] } +serde_json = "1.0.81" +clap = { version = "3.1.6", features = ["derive"] } + diff --git a/tools/speedy_sync/README.md b/tools/speedy_sync/README.md new file mode 100644 index 00000000000..a33efc14235 --- /dev/null +++ b/tools/speedy_sync/README.md @@ -0,0 +1,62 @@ +# Speedy sync (a.k.a PoorMan's EpochSync) + +The goal of the speedy sync is to allow people to cathup quickly with mainnet, before we have fully implemented the EpochSync feature. + +Currently, in order to catchup with mainnet there are two possible options: +* download a DB backup that Pagoda provides (around 200GB) +* sync from scrach - which can take couple days. + +With SpeedySync, you're able to catchup with mainnet in around 2-3 hours. + +# How does it work? + +With regular sync, your job needs to download all the headers from the genesis block until now (so around 60 million headers - as of May 2022). + +This of course will take a lot of time (possibly even days). The real fix, will come once we finish building EpochSync - which would require the system to load only a single block per epoch (therefore would limit number of blocks needed by a factor of 40k - to around 12k blocks). + +But as EpochSync is not there yet, you can use SpeedySync in the meantime. + +SpeedySync uses a small checkpoint (around 50kb), that contains the necessary information about the state of the chain at a given epoch. Therefore your job can continue syncing from that moment, rather than directly from genesis. + +## Is it safe? + +Yes, but with small caveat: If someone provides you with a fake checkpoint, your future block hashes will not match, that's why **You should verify the block headers after your job is synced, to make sure that they match other blocks on the mainnet**. + + +# How do I use it? + + +## Creating a checkpoint +To create a checkpoint, please run: + +``` +cargo build -p speedy_sync + +./speedy_sync create --home $PATH_TO_RUNNING_NEAR_NODE --destination-dir $PATH_TO_PLACE_WHERE_TO_PUT_CHECKPOINT +``` + +## Loading a checkpoint +If your new HOME dir doesn't have a node_key.json file, you can generate a random one using: +``` +cargo run -p keypair-generator -- --home /tmp/bar --generate-config node-key +``` + + +To load a checkpoint, please run: +``` +cargo build -p speedy_sync +./speedy_sync load --source-dir $PATH_TO_CHECKPOINT_DIR --target-home $PATH_TO_HOME_DIR_OF_A_NEW_NODE +``` + + +### After running speedy + +**Important:** After running the 'load' command, you must still copy the 'node_key.json' file into that directory, before running neard. + +Please also check and verify the config.json file. + +Afterwards you can start the neard with the new homedir and let it sync: + +``` +./neard --home $PATH_TO_HOME_DIR_OF_A_NEW_NODE +``` diff --git a/tools/speedy_sync/src/main.rs b/tools/speedy_sync/src/main.rs new file mode 100644 index 00000000000..9e9d256046d --- /dev/null +++ b/tools/speedy_sync/src/main.rs @@ -0,0 +1,276 @@ +use std::fs; +use std::path::Path; +use std::sync::Arc; + +use borsh::{BorshDeserialize, BorshSerialize}; +use clap::Parser; +use near_chain::types::Tip; +use near_chain::{Chain, ChainGenesis, DoomslugThresholdMode}; +use near_chain_configs::GenesisValidationMode; +use near_epoch_manager::types::EpochInfoAggregator; +use near_primitives::block::Block; +use near_primitives::block_header::BlockHeader; +use near_primitives::epoch_manager::block_info::BlockInfo; +use near_primitives::epoch_manager::epoch_info::EpochInfo; +use near_primitives::epoch_manager::AGGREGATOR_KEY; +use near_primitives::hash::CryptoHash; +use near_primitives::merkle::PartialMerkleTree; +use near_primitives::types::EpochId; +use near_primitives::utils::index_to_bytes; +use near_store::HEADER_HEAD_KEY; +use near_store::{DBCol, Mode, NodeStorage, Store, StoreUpdate}; + +use nearcore::NightshadeRuntime; +use serde::Serialize; + +#[derive(Serialize, BorshSerialize, BorshDeserialize)] +pub struct BlockCheckpoint { + pub header: BlockHeader, + pub info: BlockInfo, + pub merkle_tree: PartialMerkleTree, +} + +#[derive(Serialize, BorshSerialize, BorshDeserialize)] +pub struct EpochCheckpoint { + pub id: EpochId, + pub info: EpochInfo, +} + +#[derive(Serialize, BorshSerialize, BorshDeserialize)] +pub struct SpeedyCheckpoint { + pub prev_epoch: EpochCheckpoint, + pub current_epoch: EpochCheckpoint, + pub next_epoch: EpochCheckpoint, + + pub block: BlockCheckpoint, + pub prev_block: BlockCheckpoint, + pub final_block: BlockCheckpoint, + pub first_block: BlockCheckpoint, +} + +#[derive(Parser)] +pub struct CreateCmd { + #[clap(long)] + home: String, + + #[clap(long)] + destination_dir: String, +} + +#[derive(Parser)] +pub struct LoadCmd { + #[clap(long)] + target_home: String, + + #[clap(long)] + source_dir: String, +} + +#[derive(Parser)] +enum CliSubcmd { + Create(CreateCmd), + Load(LoadCmd), +} + +#[derive(Parser)] +#[clap(subcommand_required = true, arg_required_else_help = true)] +struct Cli { + #[clap(subcommand)] + subcmd: CliSubcmd, +} + +fn read_block_checkpoint(store: &Store, block_hash: &CryptoHash) -> BlockCheckpoint { + let block: Block = store + .get_ser(DBCol::Block, block_hash.as_ref()) + .expect(format!("DB error Block {:?}", block_hash).as_str()) + .expect(format!("Key missing Block {}", block_hash).as_str()); + + let info: BlockInfo = store + .get_ser(DBCol::BlockInfo, block_hash.as_ref()) + .expect(format!("DB error BlockInfo {:?}", block_hash).as_str()) + .expect(format!("Key missing BlockInfo {}", block_hash).as_str()); + + let merkle_tree: PartialMerkleTree = store + .get_ser(DBCol::BlockMerkleTree, block_hash.as_ref()) + .expect(format!("DB error BlockMerkleTree {:?}", block_hash).as_str()) + .expect(format!("Key missing BlockMerkleTree {}", block_hash).as_str()); + + BlockCheckpoint { header: block.header().clone(), info, merkle_tree } +} + +fn write_block_checkpoint(store_update: &mut StoreUpdate, block_checkpoint: &BlockCheckpoint) { + let hash = block_checkpoint.header.hash(); + store_update + .set_ser(DBCol::BlockHeader, hash.as_ref(), &block_checkpoint.header) + .expect("Failed writing a header"); + + store_update + .insert_ser(DBCol::BlockInfo, hash.as_ref(), &block_checkpoint.info) + .expect("Failed writing a block info"); + + store_update + .set_ser(DBCol::BlockMerkleTree, hash.as_ref(), &block_checkpoint.merkle_tree) + .expect("Failed writing merkle tree"); + store_update + .set_ser( + DBCol::BlockHeight, + &index_to_bytes(block_checkpoint.header.height()), + block_checkpoint.header.hash(), + ) + .unwrap(); +} + +fn write_epoch_checkpoint(store_update: &mut StoreUpdate, epoch_checkpoint: &EpochCheckpoint) { + store_update + .set_ser(DBCol::EpochInfo, epoch_checkpoint.id.as_ref(), &epoch_checkpoint.info) + .expect("Failed to write epoch info"); +} + +fn create_snapshot(create_cmd: CreateCmd) { + let path = Path::new(&create_cmd.home); + let store = NodeStorage::opener(path, &Default::default(), None) + .open_in_mode(Mode::ReadOnly) + .unwrap() + .get_store(near_store::Temperature::Hot); + + // Get epoch information: + let mut epochs = store + .iter(DBCol::EpochInfo) + .filter_map(|result| { + if let Ok((key, value)) = result { + if key.as_ref() == AGGREGATOR_KEY { + None + } else { + let info = EpochInfo::try_from_slice(value.as_ref()).unwrap(); + let id = EpochId::try_from_slice(key.as_ref()).unwrap(); + Some(EpochCheckpoint { id, info }) + } + } else { + None + } + }) + .collect::>(); + + assert!(epochs.len() > 4, "Number of epochs must be greater than 4."); + + epochs.sort_by(|a, b| a.info.epoch_height().partial_cmp(&b.info.epoch_height()).unwrap()); + // Take last two epochs + let next_epoch = epochs.pop().unwrap(); + let current_epoch = epochs.pop().unwrap(); + let prev_epoch = epochs.pop().unwrap(); + + // We need information about 4 blocks to start the chain: + // + // 'block' - we'll always pick the last block of a given epoch. + // 'prev_block' - its predecessor + // 'final_block' - the block with finality (usually 2 blocks behind) + // 'first_block' - the first block of this epoch (usualy epoch_length behind). + + let block_hash = next_epoch.id.0; + let block = read_block_checkpoint(&store, &block_hash); + let block_header = block.header.clone(); + let prev_block = read_block_checkpoint(&store, block_header.prev_hash()); + let final_block = read_block_checkpoint(&store, block_header.last_final_block()); + let first_block = read_block_checkpoint(&store, block.info.epoch_first_block()); + + let checkpoint = SpeedyCheckpoint { + prev_epoch, + current_epoch, + next_epoch, + block, + prev_block, + final_block, + first_block, + }; + + let serialized = serde_json::to_string(&checkpoint).unwrap(); + + fs::write(Path::new(&create_cmd.destination_dir).join("snapshot.json"), serialized) + .expect("Failed writing to destination file"); + + fs::write( + Path::new(&create_cmd.destination_dir).join("snapshot.borsh"), + checkpoint.try_to_vec().unwrap(), + ) + .expect("Failed writing to destination file"); + + fs::copy( + Path::new(&create_cmd.home).join("genesis.json"), + Path::new(&create_cmd.destination_dir).join("genesis.json"), + ) + .unwrap(); + fs::copy( + Path::new(&create_cmd.home).join("config.json"), + Path::new(&create_cmd.destination_dir).join("config.json"), + ) + .unwrap(); +} + +fn load_snapshot(load_cmd: LoadCmd) { + let data = fs::read(Path::new(&load_cmd.source_dir).join("snapshot.borsh")) + .expect("Failed reading snapshot.borsh"); + + let snapshot: SpeedyCheckpoint = SpeedyCheckpoint::try_from_slice(data.as_ref()).unwrap(); + + let home_dir = Path::new(&load_cmd.target_home); + fs::copy( + Path::new(&load_cmd.source_dir).join("genesis.json"), + Path::new(&load_cmd.target_home).join("genesis.json"), + ) + .unwrap(); + fs::copy( + Path::new(&load_cmd.source_dir).join("config.json"), + Path::new(&load_cmd.target_home).join("config.json"), + ) + .unwrap(); + + let config = nearcore::config::load_config(&home_dir, GenesisValidationMode::UnsafeFast) + .unwrap_or_else(|e| panic!("Error loading config: {:#}", e)); + let store = NodeStorage::opener(home_dir, &Default::default(), None) + .open() + .unwrap() + .get_store(near_store::Temperature::Hot); + let chain_genesis = ChainGenesis::new(&config.genesis); + let runtime = Arc::new(NightshadeRuntime::from_config(home_dir, store.clone(), &config)); + // This will initialize the database (add genesis block etc) + let _chain = Chain::new( + runtime.clone(), + &chain_genesis, + DoomslugThresholdMode::TwoThirds, + !config.client_config.archive, + ) + .unwrap(); + + let mut store_update = store.store_update(); + // Store epoch information. + write_epoch_checkpoint(&mut store_update, &snapshot.current_epoch); + write_epoch_checkpoint(&mut store_update, &snapshot.prev_epoch); + write_epoch_checkpoint(&mut store_update, &snapshot.next_epoch); + + // Store blocks. + write_block_checkpoint(&mut store_update, &snapshot.block); + write_block_checkpoint(&mut store_update, &snapshot.prev_block); + write_block_checkpoint(&mut store_update, &snapshot.final_block); + write_block_checkpoint(&mut store_update, &snapshot.first_block); + + // Store the HEADER_KEY (used in header sync). + store_update + .set_ser(DBCol::BlockMisc, HEADER_HEAD_KEY, &Tip::from_header(&snapshot.block.header)) + .unwrap(); + + // TODO: confirm if this aggregator can be empty. + // If not - we'll have to compute one and put it in the checkpoint. + let aggregator = + EpochInfoAggregator::new(snapshot.prev_epoch.id, *snapshot.final_block.header.hash()); + store_update.set_ser(DBCol::EpochInfo, AGGREGATOR_KEY, &aggregator).unwrap(); + store_update.commit().unwrap(); +} + +fn main() { + let args = Cli::parse(); + + match args.subcmd { + CliSubcmd::Create(create_cmd) => create_snapshot(create_cmd), + CliSubcmd::Load(load_cmd) => load_snapshot(load_cmd), + } +} From aa20bc52a961ae1eafa1c31ef62bba5b1fba9eb1 Mon Sep 17 00:00:00 2001 From: mm-near <91919554+mm-near@users.noreply.github.com> Date: Mon, 31 Oct 2022 14:02:22 +0100 Subject: [PATCH 055/103] fixed compilation error in store_bench (#7963) Co-authored-by: near-bulldozer[bot] <73298989+near-bulldozer[bot]@users.noreply.github.com> --- core/store/benches/store_bench.rs | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/core/store/benches/store_bench.rs b/core/store/benches/store_bench.rs index 0368b82c5e5..0327e4740c4 100644 --- a/core/store/benches/store_bench.rs +++ b/core/store/benches/store_bench.rs @@ -19,15 +19,10 @@ fn benchmark_write_then_read_successful( let tmp_dir = tempfile::tempdir().unwrap(); // Use default StoreConfig rather than NodeStorage::test_opener so we’re using the // same configuration as in production. - let store = NodeStorage::opener( - tmp_dir.path(), - &Default::default(), - #[cfg(feature = "cold_store")] - None, - ) - .open() - .unwrap() - .get_store(Temperature::Hot); + let store = NodeStorage::opener(tmp_dir.path(), &Default::default(), None) + .open() + .unwrap() + .get_store(Temperature::Hot); let keys = generate_keys(num_keys, key_size); write_to_db(&store, &keys, max_value_size, col); From cc908cd7502a48b184e2f6edee65e895981f1ee2 Mon Sep 17 00:00:00 2001 From: mm-near <91919554+mm-near@users.noreply.github.com> Date: Mon, 31 Oct 2022 14:15:13 +0100 Subject: [PATCH 056/103] [DEBUG] move chain processing status to a separate API endpoint (#7962) As the block status can get quite heavy when network is behind. --- chain/client-primitives/src/debug.rs | 8 +- chain/client/src/client_actor.rs | 1 - chain/client/src/debug.rs | 3 + chain/jsonrpc-primitives/src/types/status.rs | 5 +- chain/jsonrpc/res/chain_n_chunk_info.html | 126 +++++++++++-------- chain/jsonrpc/src/api/status.rs | 5 + chain/jsonrpc/src/lib.rs | 3 + core/primitives/src/views.rs | 1 - 8 files changed, 95 insertions(+), 57 deletions(-) diff --git a/chain/client-primitives/src/debug.rs b/chain/client-primitives/src/debug.rs index 13cda11270e..090d13bf594 100644 --- a/chain/client-primitives/src/debug.rs +++ b/chain/client-primitives/src/debug.rs @@ -6,7 +6,9 @@ use std::collections::HashMap; use crate::types::StatusError; use actix::Message; use chrono::DateTime; -use near_primitives::views::{CatchupStatusView, EpochValidatorInfo, SyncStatusView}; +use near_primitives::views::{ + CatchupStatusView, ChainProcessingInfo, EpochValidatorInfo, SyncStatusView, +}; use near_primitives::{ block_header::ApprovalInner, hash::CryptoHash, @@ -178,6 +180,8 @@ pub enum DebugStatus { ValidatorStatus, // Request for the current catchup status CatchupStatus, + // Request for the current state of chain processing (blocks in progress etc). + ChainProcessingStatus, } impl Message for DebugStatus { @@ -195,4 +199,6 @@ pub enum DebugStatusResponse { BlockStatus(DebugBlockStatusData), // Detailed information about the validator (approvals, block & chunk production etc.) ValidatorStatus(ValidatorStatus), + // Detailed information about chain processing (blocks in progress etc). + ChainProcessingStatus(ChainProcessingInfo), } diff --git a/chain/client/src/client_actor.rs b/chain/client/src/client_actor.rs index 9819310e856..5f2887c21c0 100644 --- a/chain/client/src/client_actor.rs +++ b/chain/client/src/client_actor.rs @@ -845,7 +845,6 @@ impl Handler> for ClientActor { .config .min_block_production_delay .as_millis() as u64, - chain_processing_info: self.client.chain.get_chain_processing_info(), }) } else { None diff --git a/chain/client/src/debug.rs b/chain/client/src/debug.rs index 9f35aa3283f..54ae6e2b549 100644 --- a/chain/client/src/debug.rs +++ b/chain/client/src/debug.rs @@ -172,6 +172,9 @@ impl Handler> for ClientActor { DebugStatus::CatchupStatus => { Ok(DebugStatusResponse::CatchupStatus(self.client.get_catchup_status()?)) } + DebugStatus::ChainProcessingStatus => Ok(DebugStatusResponse::ChainProcessingStatus( + self.client.chain.get_chain_processing_info(), + )), } } } diff --git a/chain/jsonrpc-primitives/src/types/status.rs b/chain/jsonrpc-primitives/src/types/status.rs index d7c59c97c94..21b5e2f9940 100644 --- a/chain/jsonrpc-primitives/src/types/status.rs +++ b/chain/jsonrpc-primitives/src/types/status.rs @@ -1,7 +1,9 @@ use near_client_primitives::debug::{ DebugBlockStatusData, EpochInfoView, TrackedShardsView, ValidatorStatus, }; -use near_primitives::views::{CatchupStatusView, PeerStoreView, SyncStatusView}; +use near_primitives::views::{ + CatchupStatusView, ChainProcessingInfo, PeerStoreView, SyncStatusView, +}; use serde::{Deserialize, Serialize}; #[derive(Debug, Serialize, Deserialize)] @@ -22,6 +24,7 @@ pub enum DebugStatusResponse { // Detailed information about the validator (approvals, block & chunk production etc.) ValidatorStatus(ValidatorStatus), PeerStore(PeerStoreView), + ChainProcessingStatus(ChainProcessingInfo), } #[cfg(feature = "debug_types")] diff --git a/chain/jsonrpc/res/chain_n_chunk_info.html b/chain/jsonrpc/res/chain_n_chunk_info.html index 80a3121f546..bbb51c18c26 100644 --- a/chain/jsonrpc/res/chain_n_chunk_info.html +++ b/chain/jsonrpc/res/chain_n_chunk_info.html @@ -1,4 +1,4 @@ - +