Skip to content

Commit

Permalink
Repair the test_repair.sh test, handle async (#467)
Browse files Browse the repository at this point in the history
Update test_repair.sh for async, wait on each IO so we can guarantee
the order and know what to expect when the repair test finishes.

Added the generation number stuff to the test as well.
Print more info if a mismatch happens.

Set repair test to 100 loops.

polish the printlns, because I just must.

Co-authored-by: Alan Hanson <alan@oxide.computer>
  • Loading branch information
leftwo and Alan Hanson committed Oct 3, 2022
1 parent 9fb9e3e commit 144d8da
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 23 deletions.
46 changes: 28 additions & 18 deletions crutest/src/main.rs
Expand Up @@ -378,7 +378,10 @@ impl WriteLog {
res = false;
}
if update {
println!("Update block {} to {}", index, new_max);
println!(
"Update block {} to {} (min:{} max:{} res:{}",
index, new_max, min, max, res
);
self.count_cur[index] = new_max;
}
res
Expand Down Expand Up @@ -856,7 +859,10 @@ async fn verify_volume(
) -> Result<()> {
assert_eq!(ri.write_log.len(), ri.total_blocks);

println!("Read and Verify all blocks (0..{})", ri.total_blocks);
println!(
"Read and Verify all blocks (0..{} range:{})",
ri.total_blocks, range
);

let pb = ProgressBar::new(ri.total_blocks as u64);
pb.set_style(ProgressStyle::default_bar()
Expand Down Expand Up @@ -893,17 +899,17 @@ async fn verify_volume(
ValidateStatus::Bad => {
pb.finish_with_message("Error");
bail!(
"Error in range {} -> {}",
"Error in block range {} -> {}",
block_index,
block_index + io_sz
block_index + next_io_blocks
);
}
ValidateStatus::InRange => {
if range {
{}
} else {
pb.finish_with_message("Error");
bail!("Error at {}", block_index);
bail!("Error at block {}", block_index);
}
}
ValidateStatus::Good => {}
Expand Down Expand Up @@ -1927,12 +1933,14 @@ async fn repair_workload(
// Any state coming in should have been verified, so we can
// consider the current write log to be the minimum possible values.
ri.write_log.commit();
let mut futureslist = Vec::new();
// TODO: Allow user to request r/w/f percentage (how???)
// We want at least one write, otherwise there will be nothing to
// repair.
let mut one_write = false;
// These help the printlns use the minimum white space
let count_width = count.to_string().len();
let block_width = ri.total_blocks.to_string().len();
let size_width = (10 * ri.block_size).to_string().len();
for c in 1..=count {
let op = rng.gen_range(0..10);
// Make sure the last few commands are not a flush
Expand All @@ -1944,8 +1952,7 @@ async fn repair_workload(
count,
width = count_width,
);
let future = guest.flush(None);
futureslist.push(future);
guest.flush(None).await?;
// Commit the current write log because we know this flush
// will make it out on at least two DS, so any writes before this
// point should also be persistent.
Expand Down Expand Up @@ -1984,36 +1991,39 @@ async fn repair_workload(
let data = Bytes::from(vec);

println!(
"{:>0width$}/{:>0width$} Write at block {:5}, len:{:7}",
"{:>0width$}/{:>0width$} Write \
block {:>bw$} len {:>sw$} data:{:>3}",
c,
count,
offset.value,
data.len(),
data[1],
width = count_width,
bw = block_width,
sw = size_width,
);
let future = guest.write(offset, data);
futureslist.push(future);
guest.write(offset, data).await?;
} else {
// Read
let length: usize = size * ri.block_size as usize;
let vec: Vec<u8> = vec![255; length];
let data = crucible::Buffer::from_vec(vec);
println!(
"{:>0width$}/{:>0width$} Read at block {:5}, len:{:7}",
"{:>0width$}/{:>0width$} Read \
block {:>bw$} len {:>sw$}",
c,
count,
offset.value,
data.len().await,
width = count_width,
bw = block_width,
sw = size_width,
);
let future = guest.read(offset, data.clone());
futureslist.push(future);
guest.read(offset, data.clone()).await?;
}
}
}
println!("loop over {} futures", futureslist.len());
crucible::join_all(futureslist).await?;

guest.show_work().await?;
Ok(())
}

Expand Down Expand Up @@ -2268,7 +2278,7 @@ async fn biggest_io_workload(
* sending jobs to the downstairs, creating dependencys that it will
* eventually resolve.
*
* TODO: Make this test use the global write count.
* TODO: Make this test use the global write count, but remember, async.
*/
async fn dep_workload(guest: &Arc<Guest>, ri: &mut RegionInfo) -> Result<()> {
let final_offset = ri.total_size - ri.block_size;
Expand Down
14 changes: 9 additions & 5 deletions tools/test_repair.sh
Expand Up @@ -92,16 +92,18 @@ fi

target_args="-t 127.0.0.1:8810 -t 127.0.0.1:8820 -t 127.0.0.1:8830"

generation=1
# Do initial volume population.
if ! ${ct} fill ${target_args} --verify-out "$verify_file" -q
if ! ${ct} fill ${target_args} --verify-out "$verify_file" -q -g "$generation"
then
echo "ERROR: Exit on initial fill"
cleanup
exit 1
fi
(( generation += 1))

# Start loop
for (( i = 0; i < 20; i += 1 )); do
for (( i = 0; i < 100; i += 1 )); do

choice=$((RANDOM % 3))
echo ""
Expand All @@ -126,12 +128,13 @@ for (( i = 0; i < 20; i += 1 )); do
ds2_pid=$!
fi

if ! ${ct} repair ${target_args} --verify-out "$verify_file" --verify-in "$verify_file" -c 30
if ! ${ct} repair ${target_args} --verify-out "$verify_file" --verify-in "$verify_file" -c 30 -g "$generation"
then
echo "Exit on repair fail, loop: $i, choice: $choice"
cleanup
exit 1
fi
(( generation += 1))

echo ""
# Stop --lossy downstairs so it can't complete all its IOs
Expand Down Expand Up @@ -167,14 +170,15 @@ for (( i = 0; i < 20; i += 1 )); do
fi

echo "Verifying data now"
echo ${ct} verify ${target_args} --verify-out "$verify_file" --verify-in "$verify_file" --range -q > "$test_log"
if ! ${ct} verify ${target_args} --verify-out "$verify_file" --verify-in "$verify_file" --range -q >> "$test_log"
echo ${ct} verify ${target_args} --verify-out "$verify_file" --verify-in "$verify_file" --range -q -g "$generation" > "$test_log"
if ! ${ct} verify ${target_args} --verify-out "$verify_file" --verify-in "$verify_file" --range -q -g "$generation" >> "$test_log" 2>&1
then
echo "Exit on verify fail, loop: $i, choice: $choice"
echo "Check $test_log for details"
cleanup
exit 1
fi
(( generation += 1))

echo "Loop: $i Downstairs dump after verify (and repair):"
${cds} dump ${dump_args[@]}
Expand Down

0 comments on commit 144d8da

Please sign in to comment.