Skip to content

Commit

Permalink
fix: always restart logs aggregator (#1841)
Browse files Browse the repository at this point in the history
## Description:
This change strengthens the restart policy for the logs aggregator.
Prior to this, the restart only occurred on failure. Now, we make docker
attempt to always restart the logs aggregator. This should help address
#1832 where the logs
aggregator was stopped with a `137` status code but wasn't restarted.

This change also addresses a `Propagate must be provided with a cause`
panic occurred here:
#1832. This was caused
by nil err's being propagated in the create logs collector code. This
change fixes that issue.

## Is this change user facing?
NO

## References:
#1832
#1311
  • Loading branch information
tedim52 committed Nov 22, 2023
1 parent 2377868 commit 7e6382f
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 4 deletions.
Expand Up @@ -415,14 +415,14 @@ func (backend *DockerKurtosisBackend) CreateLogsCollectorForEnclave(
if maybeLogsAggregator == nil {
logrus.Warnf("Logs aggregator container does not exist. This is unexpected as docker should have restarted the container automatically.")
logrus.Warnf("This can be fixed by restarting the engine using `kurtosis engine restart` and attempting to create the enclave again.")
return nil, stacktrace.Propagate(err, "No logs aggregator container exists. The logs collector cannot be run without a logs aggregator.")
return nil, stacktrace.NewError("No logs aggregator container exists. The logs collector cannot be run without a logs aggregator.")
}
if maybeLogsAggregator.GetStatus() != container.ContainerStatus_Running {
logrus.Warnf("Logs aggregator exists but is not running. Instead container status is '%v'. This is unexpected as docker should have restarted the container automatically.",
maybeLogsAggregator.GetStatus())
logrus.Warnf("This can be fixed by restarting the engine using `kurtosis engine restart` and attempting to create the enclave again.")
return nil, stacktrace.Propagate(err,
"The logs aggregator container exists but is not running. Instead container status is '%v'. The logs collector cannot be run without a logs aggregator.",
return nil, stacktrace.NewError(
"The logs aggregator container exists but is not running. Instead logs aggregator container status is '%v'. The logs collector cannot be run without a logs aggregator.",
maybeLogsAggregator.GetStatus(),
)
}
Expand Down
Expand Up @@ -54,7 +54,7 @@ func (vector *vectorContainerConfigProvider) GetContainerArgs(

// The logs aggregator should ALWAYS be running to ensure that no logs are lost for services in enclaves
// Thus, instruct docker to restart the container if it exits with non-zero status code for whatever reason
restartPolicy := docker_manager.RestartPolicy("on-failure")
restartPolicy := docker_manager.RestartPolicy(docker_manager.RestartAlways)

createAndStartArgs := docker_manager.NewCreateAndStartContainerArgsBuilder(
containerImage,
Expand Down
Expand Up @@ -142,6 +142,7 @@ const (
type RestartPolicy string

const (
RestartAlways = "always"
RestartOnFailure = "on-failure"
NoRestart = ""
)
Expand Down

0 comments on commit 7e6382f

Please sign in to comment.