Skip to content

Commit

Permalink
Merge pull request #2468 from subspace/gemini-3g-backport-graceful-fa…
Browse files Browse the repository at this point in the history
…rmer-error-handling

Gemini 3g backport: graceful farmer error handling
  • Loading branch information
nazar-pc committed Jan 29, 2024
2 parents 469193e + 2c33788 commit 8bbdfbd
Show file tree
Hide file tree
Showing 10 changed files with 318 additions and 160 deletions.
3 changes: 2 additions & 1 deletion crates/pallet-subspace/src/mock.rs
Original file line number Diff line number Diff line change
Expand Up @@ -482,7 +482,8 @@ pub fn create_signed_vote(
vote_solution_range,
&plotted_sector_bytes,
&plotted_sector.sector_metadata,
);
)
.unwrap();

let Some(audit_result) = maybe_audit_result else {
// Sector didn't have any solutions
Expand Down
38 changes: 22 additions & 16 deletions crates/subspace-farmer-components/benches/auditing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -151,14 +151,17 @@ pub fn criterion_benchmark(c: &mut Criterion) {
group.throughput(Throughput::Elements(1));
group.bench_function("memory/sync", |b| {
b.iter(|| async {
black_box(audit_plot_sync(
black_box(public_key),
black_box(global_challenge),
black_box(solution_range),
black_box(&plotted_sector_bytes),
black_box(slice::from_ref(&plotted_sector.sector_metadata)),
black_box(None),
));
black_box(
audit_plot_sync(
black_box(public_key),
black_box(global_challenge),
black_box(solution_range),
black_box(&plotted_sector_bytes),
black_box(slice::from_ref(&plotted_sector.sector_metadata)),
black_box(None),
)
.unwrap(),
);
})
});

Expand Down Expand Up @@ -193,14 +196,17 @@ pub fn criterion_benchmark(c: &mut Criterion) {
group.throughput(Throughput::Elements(sectors_count));
group.bench_function("disk/sync", |b| {
b.iter(|| {
black_box(audit_plot_sync(
black_box(public_key),
black_box(global_challenge),
black_box(solution_range),
black_box(&plot_file),
black_box(&sectors_metadata),
black_box(None),
));
black_box(
audit_plot_sync(
black_box(public_key),
black_box(global_challenge),
black_box(solution_range),
black_box(&plot_file),
black_box(&sectors_metadata),
black_box(None),
)
.unwrap(),
);
});
});

Expand Down
6 changes: 4 additions & 2 deletions crates/subspace-farmer-components/benches/proving.rs
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,8 @@ pub fn criterion_benchmark(c: &mut Criterion) {
&plotted_sector_bytes,
slice::from_ref(&plotted_sector.sector_metadata),
None,
);
)
.unwrap();

let solution_candidates = match audit_results.into_iter().next() {
Some(audit_result) => audit_result.solution_candidates,
Expand Down Expand Up @@ -249,7 +250,8 @@ pub fn criterion_benchmark(c: &mut Criterion) {
&plot_file,
&sectors_metadata,
None,
);
)
.unwrap();
let solution_candidates = audit_results
.into_iter()
.map(|audit_result| audit_result.solution_candidates)
Expand Down
67 changes: 39 additions & 28 deletions crates/subspace-farmer-components/src/auditing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,28 @@ use crate::proving::SolutionCandidates;
use crate::sector::{sector_size, SectorContentsMap, SectorMetadataChecksummed};
use crate::{ReadAtOffset, ReadAtSync};
use rayon::prelude::*;
use std::io;
use subspace_core_primitives::crypto::Scalar;
use subspace_core_primitives::{
Blake3Hash, PublicKey, SBucket, SectorId, SectorIndex, SectorSlotChallenge, SolutionRange,
};
use subspace_verification::is_within_solution_range;
use tracing::warn;
use thiserror::Error;

/// Errors that happen during proving
#[derive(Debug, Error)]
pub enum AuditingError {
/// Failed read s-bucket
#[error("Failed read s-bucket {s_bucket_audit_index} of sector {sector_index}: {error}")]
SBucketReading {
/// Sector index
sector_index: SectorIndex,
/// S-bucket audit index
s_bucket_audit_index: SBucket,
/// Low-level error
error: io::Error,
},
}

/// Result of sector audit
#[derive(Debug, Clone)]
Expand Down Expand Up @@ -42,7 +58,7 @@ pub fn audit_sector_sync<'a, Sector>(
solution_range: SolutionRange,
sector: Sector,
sector_metadata: &'a SectorMetadataChecksummed,
) -> Option<AuditResult<'a, Sector>>
) -> Result<Option<AuditResult<'a, Sector>>, AuditingError>
where
Sector: ReadAtSync + 'a,
{
Expand All @@ -55,26 +71,24 @@ where
} = collect_sector_auditing_details(public_key.hash(), global_challenge, sector_metadata);

let mut s_bucket = vec![0; s_bucket_audit_size];
let read_s_bucket_result = sector.read_at(&mut s_bucket, s_bucket_audit_offset_in_sector);

if let Err(error) = read_s_bucket_result {
warn!(
%error,
sector_index = %sector_metadata.sector_index,
%s_bucket_audit_index,
"Failed read s-bucket",
);
return None;
}
sector
.read_at(&mut s_bucket, s_bucket_audit_offset_in_sector)
.map_err(|error| AuditingError::SBucketReading {
sector_index: sector_metadata.sector_index,
s_bucket_audit_index,
error,
})?;

let (winning_chunks, best_solution_distance) = map_winning_chunks(
let Some((winning_chunks, best_solution_distance)) = map_winning_chunks(
&s_bucket,
global_challenge,
&sector_slot_challenge,
solution_range,
)?;
) else {
return Ok(None);
};

Some(AuditResult {
Ok(Some(AuditResult {
sector_index: sector_metadata.sector_index,
solution_candidates: SolutionCandidates::new(
public_key,
Expand All @@ -85,7 +99,7 @@ where
winning_chunks.into(),
),
best_solution_distance,
})
}))
}

/// Audit the whole plot and generate streams of solutions
Expand All @@ -96,7 +110,7 @@ pub fn audit_plot_sync<'a, Plot>(
plot: &'a Plot,
sectors_metadata: &'a [SectorMetadataChecksummed],
maybe_sector_being_modified: Option<SectorIndex>,
) -> Vec<AuditResult<'a, ReadAtOffset<'a, Plot>>>
) -> Result<Vec<AuditResult<'a, ReadAtOffset<'a, Plot>>>, AuditingError>
where
Plot: ReadAtSync + 'a,
{
Expand Down Expand Up @@ -135,14 +149,11 @@ where
&mut s_bucket,
sector_auditing_info.s_bucket_audit_offset_in_sector,
) {
warn!(
%error,
sector_index = %sector_metadata.sector_index,
s_bucket_audit_index = %sector_auditing_info.s_bucket_audit_index,
"Failed read s-bucket",
);

return None;
return Some(Err(AuditingError::SBucketReading {
sector_index: sector_metadata.sector_index,
s_bucket_audit_index: sector_auditing_info.s_bucket_audit_index,
error,
}));
}

let (winning_chunks, best_solution_distance) = map_winning_chunks(
Expand All @@ -152,7 +163,7 @@ where
solution_range,
)?;

Some(AuditResult {
Some(Ok(AuditResult {
sector_index: sector_metadata.sector_index,
solution_candidates: SolutionCandidates::new(
public_key,
Expand All @@ -163,7 +174,7 @@ where
winning_chunks.into(),
),
best_solution_distance,
})
}))
})
.collect()
}
Expand Down
14 changes: 14 additions & 0 deletions crates/subspace-farmer-components/src/proving.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,20 @@ pub enum ProvingError {
RecordReadingError(#[from] ReadingError),
}

impl ProvingError {
/// Whether this error is fatal and makes farm unusable
pub fn is_fatal(&self) -> bool {
match self {
ProvingError::InvalidErasureCodingInstance => true,
ProvingError::FailedToCreatePolynomialForRecord { .. } => false,
ProvingError::FailedToCreateChunkWitness { .. } => false,
ProvingError::FailedToDecodeSectorContentsMap(_) => false,
ProvingError::Io(_) => true,
ProvingError::RecordReadingError(error) => error.is_fatal(),
}
}
}

#[derive(Debug, Clone)]
struct WinningChunk {
/// Chunk offset within s-bucket
Expand Down
15 changes: 15 additions & 0 deletions crates/subspace-farmer-components/src/reading.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,21 @@ pub enum ReadingError {
ChecksumMismatch,
}

impl ReadingError {
/// Whether this error is fatal and renders farm unusable
pub fn is_fatal(&self) -> bool {
match self {
ReadingError::FailedToReadChunk { .. } => false,
ReadingError::InvalidChunk { .. } => false,
ReadingError::FailedToErasureDecodeRecord { .. } => false,
ReadingError::WrongRecordSizeAfterDecoding { .. } => false,
ReadingError::FailedToDecodeSectorContentsMap(_) => false,
ReadingError::Io(_) => true,
ReadingError::ChecksumMismatch => false,
}
}
}

/// Record contained in the plot
#[derive(Debug, Clone)]
pub struct PlotRecord {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ fn prove(
table_generator: &table_generator,
};

let mut audit_results = plot_audit.audit(options);
let mut audit_results = plot_audit.audit(options).unwrap();

group.bench_function("plot/single", |b| {
b.iter_batched(
Expand All @@ -259,7 +259,7 @@ fn prove(
return result;
}

audit_results = plot_audit.audit(options);
audit_results = plot_audit.audit(options).unwrap();

audit_results.pop().unwrap()
},
Expand Down Expand Up @@ -293,7 +293,7 @@ fn prove(
maybe_sector_being_modified: None,
table_generator: &table_generator,
};
let mut audit_results = plot_audit.audit(options);
let mut audit_results = plot_audit.audit(options).unwrap();

group.bench_function("plot/rayon", |b| {
b.iter_batched(
Expand All @@ -302,7 +302,7 @@ fn prove(
return result;
}

audit_results = plot_audit.audit(options);
audit_results = plot_audit.audit(options).unwrap();

audit_results.pop().unwrap()
},
Expand Down
42 changes: 30 additions & 12 deletions crates/subspace-farmer/src/single_disk_farm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -361,9 +361,6 @@ pub enum SingleDiskFarmError {
/// Failed to decode metadata header
#[error("Failed to decode metadata header: {0}")]
FailedToDecodeMetadataHeader(parity_scale_codec::Error),
/// Failed to decode sector metadata
#[error("Failed to decode sector metadata: {0}")]
FailedToDecodeSectorMetadata(parity_scale_codec::Error),
/// Unexpected metadata version
#[error("Unexpected metadata version {0}")]
UnexpectedMetadataVersion(u8),
Expand Down Expand Up @@ -765,12 +762,13 @@ impl SingleDiskFarm {
}
};

let metadata_file_path = directory.join(Self::METADATA_FILE);
let mut metadata_file = OpenOptions::new()
.read(true)
.write(true)
.create(true)
.advise_random_access()
.open(directory.join(Self::METADATA_FILE))?;
.open(&metadata_file_path)?;

metadata_file.advise_random_access()?;

Expand Down Expand Up @@ -827,14 +825,34 @@ impl SingleDiskFarm {

let mut sector_metadata_bytes = vec![0; sector_metadata_size];
for sector_index in 0..metadata_header.plotted_sector_count {
metadata_file.read_exact_at(
&mut sector_metadata_bytes,
RESERVED_PLOT_METADATA + sector_metadata_size as u64 * u64::from(sector_index),
)?;
sectors_metadata.push(
SectorMetadataChecksummed::decode(&mut sector_metadata_bytes.as_ref())
.map_err(SingleDiskFarmError::FailedToDecodeSectorMetadata)?,
);
let sector_offset =
RESERVED_PLOT_METADATA + sector_metadata_size as u64 * u64::from(sector_index);
metadata_file.read_exact_at(&mut sector_metadata_bytes, sector_offset)?;

let sector_metadata =
match SectorMetadataChecksummed::decode(&mut sector_metadata_bytes.as_ref()) {
Ok(sector_metadata) => sector_metadata,
Err(error) => {
warn!(
path = %metadata_file_path.display(),
%error,
%sector_index,
"Failed to decode sector metadata, replacing with dummy expired \
sector metadata"
);

let dummy_sector = SectorMetadataChecksummed::from(SectorMetadata {
sector_index,
pieces_in_sector,
s_bucket_sizes: Box::new([0; Record::NUM_S_BUCKETS]),
history_size: HistorySize::from(SegmentIndex::ZERO),
});
metadata_file.write_all_at(&dummy_sector.encode(), sector_offset)?;

dummy_sector
}
};
sectors_metadata.push(sector_metadata);
}

Arc::new(RwLock::new(sectors_metadata))
Expand Down
Loading

0 comments on commit 8bbdfbd

Please sign in to comment.