Skip to content

Commit

Permalink
agent: Fix exec hang issues with a backgroud process
Browse files Browse the repository at this point in the history
Issue #4747 and pull request #4748 fix exec hang issues where the exec
command hangs when a process's stdout is not closed. However, the PR might
cause the exec command not to work as expected, leading to CI failure. The
PR was reverted in #7042. This PR resolves the exec hang issues and has
undergone 1000 rounds of testing to verify that it would not cause any CI
failures.

Fixes: #4747

Signed-off-by: Fupan Li <fupan.lfp@antgroup.com>
Signed-off-by: Xuewei Niu <niuxuewei.nxw@antgroup.com>
  • Loading branch information
justxuewei committed Jul 16, 2023
1 parent f6a51a8 commit 6c91af0
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 6 deletions.
2 changes: 1 addition & 1 deletion src/agent/rustjail/src/process.rs
Expand Up @@ -161,7 +161,7 @@ impl Process {

pub fn notify_term_close(&mut self) {
let notify = self.term_exit_notifier.clone();
notify.notify_one();
notify.notify_waiters();
}

pub fn close_stdin(&mut self) {
Expand Down
17 changes: 12 additions & 5 deletions src/agent/src/rpc.rs
Expand Up @@ -595,15 +595,16 @@ impl AgentService {
let cid = req.container_id;
let eid = req.exec_id;

let mut term_exit_notifier = Arc::new(tokio::sync::Notify::new());
let term_exit_notifier;
let reader = {
let s = self.sandbox.clone();
let mut sandbox = s.lock().await;

let p = sandbox.find_container_process(cid.as_str(), eid.as_str())?;

term_exit_notifier = p.term_exit_notifier.clone();

if p.term_master.is_some() {
term_exit_notifier = p.term_exit_notifier.clone();
p.get_reader(StreamType::TermMaster)
} else if stdout {
if p.parent_stdout.is_some() {
Expand All @@ -623,16 +624,22 @@ impl AgentService {
let reader = reader.ok_or_else(|| anyhow!("cannot get stream reader"))?;

tokio::select! {
_ = term_exit_notifier.notified() => {
Err(anyhow!("eof"))
}
// Poll the futures in the order they appear from top to bottom
// it is very important to avoid data loss. If there is still
// data in the buffer and read_stream branch will return
// Poll::Ready so that the term_exit_notifier will never polled
// before all data were read.
biased;
v = read_stream(reader, req.len as usize) => {
let vector = v?;
let mut resp = ReadStreamResponse::new();
resp.set_data(vector);

Ok(resp)
}
_ = term_exit_notifier.notified() => {
Err(anyhow!("eof"))
}
}
}
}
Expand Down

0 comments on commit 6c91af0

Please sign in to comment.