Skip to content

Commit

Permalink
nexus: Assertion failure when unpublishing a faulted nexus
Browse files Browse the repository at this point in the history
The assertion occurs due to the nvmf subsystem transitioning from a
paused to an inactive state, which is forbidden. Resume the nexus
first, so that it is in an active state, before unpublishing it, which
also covers destroying a nexus. A nexus in a faulted state already has
no usable replicas so is itself already unusable.

Repurpose the existing cargo test for a replica that is stopped, then
continued, to test the above case with a single remote replica.
  • Loading branch information
jonathan-teh committed Dec 16, 2020
1 parent b530b11 commit 190818c
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 30 deletions.
4 changes: 4 additions & 0 deletions mayastor/src/bdev/nexus/nexus_share.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,10 @@ impl Nexus {
self.unshare().await?;
}
Some(NexusTarget::NexusNvmfTarget) => {
// NVMf subsystem must be in 'active' state
// A faulted Nexus would be paused, resume it first
self.resume().await.unwrap();

self.unshare().await?;
}
None => {
Expand Down
81 changes: 51 additions & 30 deletions mayastor/tests/replica_timeout.rs
Original file line number Diff line number Diff line change
@@ -1,23 +1,33 @@
#![allow(unused_assignments)]

use common::{bdev_io, compose::Builder, MayastorTest};
use common::{compose::Builder, MayastorTest};
use mayastor::{
bdev::{nexus_create, nexus_lookup},
bdev::{nexus_create, nexus_lookup, NexusStatus},
core::MayastorCliArgs,
subsys::{Config, NvmeBdevOpts},
};
use rpc::mayastor::{BdevShareRequest, BdevUri, Null};
use rpc::mayastor::{BdevShareRequest, BdevUri, Null, ShareProtocolNexus};
use std::process::{Command, Stdio};
use tokio::time::Duration;

pub mod common;
static NXNAME: &str = "nexus";

#[ignore]
#[tokio::test]
async fn replica_stop_cont() {
// Use a shorter timeouts than the default to reduce test runtime
Config::get_or_init(|| Config {
nvme_bdev_opts: NvmeBdevOpts {
action_on_timeout: 2,
timeout_us: 5_000_000,
keep_alive_timeout_ms: 5_000,
retry_count: 2,
..Default::default()
},
..Default::default()
})
.apply();
let test = Builder::new()
.name("cargo-test")
.network("10.1.0.0/16")
.add_container("ms2")
.add_container("ms1")
.with_clean(true)
.build()
Expand Down Expand Up @@ -47,27 +57,25 @@ async fn replica_stop_cont() {

let mayastor = MayastorTest::new(MayastorCliArgs::default());

// create a nexus with the remote replica as its child
mayastor
.spawn(async move {
nexus_create(
NXNAME,
1024 * 1024 * 50,
None,
&[
format!(
"nvmf://{}:8420/nqn.2019-05.io.openebs:disk0",
hdls[0].endpoint.ip()
),
format!(
"nvmf://{}:8420/nqn.2019-05.io.openebs:disk0",
hdls[1].endpoint.ip()
),
],
&[format!(
"nvmf://{}:8420/nqn.2019-05.io.openebs:disk0",
hdls[0].endpoint.ip()
)],
)
.await
.unwrap();
bdev_io::write_some(NXNAME, 0, 0xff).await.unwrap();
bdev_io::read_some(NXNAME, 0, 0xff).await.unwrap();
nexus_lookup(&NXNAME)
.unwrap()
.share(ShareProtocolNexus::NexusNvmf, None)
.await
.expect("should publish nexus over nvmf");
})
.await;

Expand All @@ -78,25 +86,38 @@ async fn replica_stop_cont() {
println!("waiting for the container to be fully suspended... {}/5", i);
}

mayastor.send(async {
// we do not determine if the IO completed with an error or not just
// that it completes.
let _ = dbg!(bdev_io::read_some(NXNAME, 0, 0xff).await);
let _ = dbg!(bdev_io::read_some(NXNAME, 0, 0xff).await);
});
// initiate the read and leave it in the background to time out
let nxuri =
format!("nvmf://127.0.0.1:8420/nqn.2019-05.io.openebs:{}", NXNAME);
Command::new("../target/debug/initiator")
.args(&[&nxuri, "read", "/tmp/tmpread"])
.stdout(Stdio::piped())
.spawn()
.expect("should send read from initiator");

println!("IO submitted unfreezing container...");

for i in 1 .. 6 {
// KATO is 5s, wait at least that long
let n = 8;
for i in 1 .. n + 1 {
ticker.tick().await;
println!("unfreeze delay... {}/5", i);
println!("unfreeze delay... {}/{}", i, n);
}
test.thaw("ms1").await.unwrap();
println!("container thawed");

// unshare the nexus while its status is faulted
mayastor
.spawn(async {
let nexus = nexus_lookup(NXNAME).unwrap();
nexus.destroy().await.unwrap();
.spawn(async move {
assert_eq!(
nexus_lookup(&NXNAME).unwrap().status(),
NexusStatus::Faulted,
);
nexus_lookup(&NXNAME)
.unwrap()
.unshare_nexus()
.await
.expect("should unpublish nexus");
})
.await;
}

0 comments on commit 190818c

Please sign in to comment.