Skip to content

Commit

Permalink
feat: interpret all docker states with shutdown (#311)
Browse files Browse the repository at this point in the history
* Interpret all docker states

Added code to interpret all docker states and act accordingly. This is to
prevent Docker going into undefined states with undefined behaviour.

* review comments

* clippy
  • Loading branch information
hansieodendaal committed Feb 8, 2024
1 parent 0c9c7f2 commit 0716ffe
Show file tree
Hide file tree
Showing 5 changed files with 167 additions and 22 deletions.
12 changes: 11 additions & 1 deletion cli/src/component/main_view.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
// USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//

use crossterm::terminal::disable_raw_mode;
use ratatui::{
backend::Backend,
layout::{Constraint, Direction, Layout, Rect},
Expand Down Expand Up @@ -71,7 +72,16 @@ impl Input for MainView {
state.focus_on(focus::TERMINATION);
state.update_state();

if !is_docker_running() {
// Spawn a new thread to exit the process after 30s if it has not already exited
if is_docker_running() {
std::thread::spawn(|| {
std::thread::sleep(std::time::Duration::from_secs(60));
log::warn!("The process did not stop cleanly. Terminating it.");
let _unused = disable_raw_mode();
std::process::exit(0);
});
} else {
let _unused = disable_raw_mode();
std::process::exit(0);
}
} else if matches!(event, ComponentEvent::StateChanged) {
Expand Down
1 change: 1 addition & 0 deletions cli/src/dashboard.rs
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,7 @@ impl Do<Tick> for Dashboard {
}
if state.is_terminated() {
self.stop_the_app()?;
ctx.shutdown();
}
Ok(())
}
Expand Down
49 changes: 41 additions & 8 deletions libs/sdm/src/image/task/docker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
use std::{collections::HashMap, path::Path};

use anyhow::{anyhow, Error};
use bollard::container::StopContainerOptions;
use bollard::models::ContainerStateStatusEnum;
use bollard::{
container::{
Config, CreateContainerOptions, LogOutput, LogsOptions, NetworkingConfig, RemoveContainerOptions,
Expand All @@ -32,8 +34,8 @@ use bollard::{
errors::Error as BollardError,
image::{CreateImageOptions, RemoveImageOptions},
models::{
ContainerInspectResponse, CreateImageInfo, EndpointSettings, EventMessage, EventMessageTypeEnum, HostConfig,
Mount as BollardMount, MountTypeEnum, PortBinding, PortMap,
CreateImageInfo, EndpointSettings, EventMessage, EventMessageTypeEnum, HostConfig, Mount as BollardMount,
MountTypeEnum, PortBinding, PortMap,
},
system::EventsOptions,
};
Expand Down Expand Up @@ -81,16 +83,37 @@ impl<C: ManagedProtocol> TaskContext<ImageTask<C>> {

pub async fn container_state(&mut self) -> ContainerState {
let res = self.driver.inspect_container(&self.inner.container_name, None).await;
// log::trace!("State of container {}: {:?}", self.inner.container_name, res);
match res {
Ok(ContainerInspectResponse { state: Some(state), .. }) => {
if state.running.unwrap_or_default() {
ContainerState::Running
Ok(ref response) => {
if let Some(state) = response.state.clone() {
if let Some(status) = state.status {
match status {
ContainerStateStatusEnum::EMPTY => ContainerState::Empty,
ContainerStateStatusEnum::CREATED => ContainerState::Created,
ContainerStateStatusEnum::RUNNING => ContainerState::Running,
ContainerStateStatusEnum::PAUSED => ContainerState::Paused,
ContainerStateStatusEnum::RESTARTING => ContainerState::Restarting,
ContainerStateStatusEnum::REMOVING => ContainerState::Removing,
ContainerStateStatusEnum::EXITED => ContainerState::Exited,
ContainerStateStatusEnum::DEAD => ContainerState::Dead,
}
} else {
log::error!(
"Status of container `{}` not defined: {:?}",
self.inner.container_name,
res
);
ContainerState::ErrorStatusNotDefined
}
} else {
ContainerState::NotRunning
log::error!(
"State of container `{}` not defined: {:?}",
self.inner.container_name,
res
);
ContainerState::ErrorStateNotDefined
}
},
Ok(_) => ContainerState::NotRunning,
Err(_) => ContainerState::NotFound,
}
}
Expand Down Expand Up @@ -197,6 +220,16 @@ impl<C: ManagedProtocol> TaskContext<ImageTask<C>> {
Ok(())
}

pub async fn try_stop_container(&mut self, options: Option<StopContainerOptions>) -> Result<(), Error> {
self.driver.stop_container(&self.inner.container_name, options).await?;
Ok(())
}

pub async fn try_unpause_container(&mut self) -> Result<(), Error> {
self.driver.unpause_container(&self.inner.container_name).await?;
Ok(())
}

pub async fn try_remove_container(&mut self) -> Result<(), Error> {
let opts = RemoveContainerOptions {
force: true,
Expand Down
10 changes: 9 additions & 1 deletion libs/sdm/src/image/task/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -153,8 +153,16 @@ impl Default for Status {

#[derive(Debug, PartialEq, Eq)]
pub enum ContainerState {
Empty,
Created,
Running,
NotRunning,
Paused,
Restarting,
Removing,
Exited,
Dead,
ErrorStatusNotDefined,
ErrorStateNotDefined,
NotFound,
}

Expand Down
117 changes: 105 additions & 12 deletions libs/sdm/src/image/task/update.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,41 +95,134 @@ impl<C: ManagedProtocol> TaskContext<ImageTask<C>> {
);
let state = self.container_state().await;
match state {
ContainerState::Running => {
ContainerState::Running | ContainerState::Restarting => {
log::debug!(
"[Clean dangling] Container {} is running. Terminating it.",
self.inner.container_name
"[Clean dangling] Container `{}` is `{:?}`. Trying to stop it.",
self.inner.container_name,
state
);
self.try_stop_container(None).await?;
self.try_kill_container().await?;
self.status.set(Status::WaitContainerKilled);
},
ContainerState::NotRunning => {
ContainerState::Paused => {
log::debug!(
"[Clean dangling] Container {} is not running. Removing it.",
self.inner.container_name
"[Clean dangling] Container `{}` is `{:?}`. Trying to stop it.",
self.inner.container_name,
state
);
self.try_unpause_container().await?;
self.try_stop_container(None).await?;
self.try_kill_container().await?;
self.status.set(Status::WaitContainerKilled);
},
ContainerState::Exited | ContainerState::Empty | ContainerState::Created => {
log::debug!(
"[Clean dangling] Container `{}` is `{:?}`. Trying to remove it.",
self.inner.container_name,
state
);
self.try_remove_container().await?;
self.status.set(Status::WaitContainerRemoved);
},
ContainerState::NotFound => {
ContainerState::NotFound | ContainerState::Dead | ContainerState::Removing => {
log::debug!(
"[Clean dangling] Container {} doesn't exist.",
self.inner.container_name
"[Clean dangling] Container `{}` is `{:?}`. Doing nothing.",
self.inner.container_name,
state
);
self.status.set(Status::Idle);
self.update_task_status(TaskStatus::Inactive)?;
},
ContainerState::ErrorStateNotDefined | ContainerState::ErrorStatusNotDefined => {
log::debug!(
"[Clean dangling] Container `{}` is `{:?}`. Retry cleaning up.",
self.inner.container_name,
state
);
self.status.set(Status::CleanDangling);
},
}
self.update_task_status(TaskStatus::Inactive)?;
Ok(())
}

async fn do_wait_container_killed(&mut self) -> Result<(), Error> {
// TODO: Wait interval
let state = self.container_state().await;
log::debug!(
"[Clean dangling] `do_wait_container_killed` for container `{}` enter state: `{:?}`",
self.inner.container_name,
state
);
let mut count = 0;
loop {
tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;
let state = self.container_state().await;
log::debug!(
"[Clean dangling] `do_wait_container_killed` for container `{}` state: `{:?}`",
self.inner.container_name,
state
);
if state == ContainerState::NotFound || state == ContainerState::Dead {
self.status.set(Status::Idle);
break;
}
if count >= 30 {
// 3 seconds
log::warn!(
"[Clean dangling] Container `{}` did not stop in time. Retry cleaning up.",
self.inner.container_name
);
self.status.set(Status::CleanDangling);
break;
}
count += 1;
}
let state = self.container_state().await;
log::debug!(
"[Clean dangling] `do_wait_container_killed` for container `{}`, exit state: `{:?}`",
self.inner.container_name,
state
);
Ok(())
}

async fn do_wait_container_removed(&mut self) -> Result<(), Error> {
// TODO: Wait interval
let state = self.container_state().await;
log::debug!(
"[Clean dangling] `do_wait_container_removed` for container `{}`, enter state: `{:?}`",
self.inner.container_name,
state
);
let mut count = 0;
loop {
tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;
let state = self.container_state().await;
log::debug!(
"[Clean dangling] `do_wait_container_removed` for container `{}` state: `{:?}`",
self.inner.container_name,
state
);
if state == ContainerState::NotFound || state == ContainerState::Dead || state == ContainerState::Removing {
self.status.set(Status::Idle);
break;
}
if count >= 30 {
// 3 seconds
log::warn!(
"[Clean dangling] Container {} was not removed in time. Retry cleaning up.",
self.inner.container_name
);
self.status.set(Status::CleanDangling);
break;
}
count += 1;
}
let state = self.container_state().await;
log::debug!(
"[Clean dangling] `do_wait_container_removed` for container `{}`, exit status: `{:?}`",
self.inner.container_name,
state
);
Ok(())
}

Expand Down

0 comments on commit 0716ffe

Please sign in to comment.