Skip to content

Commit

Permalink
Merge pull request #41935 from alexisries/Issue-41871-Restore-healthc…
Browse files Browse the repository at this point in the history
…heck-at-dockerd-restart

Resume healthcheck when daemon restarts
  • Loading branch information
thaJeztah committed Oct 15, 2021
2 parents 3e15251 + 9f39889 commit a80c450
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 1 deletion.
9 changes: 8 additions & 1 deletion daemon/daemon.go
Expand Up @@ -364,7 +364,8 @@ func (daemon *Daemon) restore() error {

c.RestartManager().Cancel() // manually start containers because some need to wait for swarm networking

if c.IsPaused() && alive {
switch {
case c.IsPaused() && alive:
s, err := daemon.containerd.Status(context.Background(), c.ID)
if err != nil {
logger(c).WithError(err).Error("failed to get container status")
Expand All @@ -382,12 +383,18 @@ func (daemon *Daemon) restore() error {
c.Lock()
c.Paused = false
daemon.setStateCounter(c)
daemon.updateHealthMonitor(c)
if err := c.CheckpointTo(daemon.containersReplica); err != nil {
log.WithError(err).Error("failed to update paused container state")
}
c.Unlock()
}
}
case !c.IsPaused() && alive:
logger(c).Debug("restoring healthcheck")
c.Lock()
daemon.updateHealthMonitor(c)
c.Unlock()
}

if !alive {
Expand Down
40 changes: 40 additions & 0 deletions integration/container/restart_test.go
Expand Up @@ -8,8 +8,10 @@ import (

"github.com/docker/docker/api/types"
"github.com/docker/docker/api/types/container"
"github.com/docker/docker/client"
"github.com/docker/docker/testutil/daemon"
"gotest.tools/v3/assert"
"gotest.tools/v3/poll"
"gotest.tools/v3/skip"
)

Expand All @@ -25,6 +27,7 @@ func TestDaemonRestartKillContainers(t *testing.T) {
xRunning bool
xRunningLiveRestore bool
xStart bool
xHealthCheck bool
}

for _, tc := range []testCase{
Expand All @@ -42,6 +45,20 @@ func TestDaemonRestartKillContainers(t *testing.T) {
xRunningLiveRestore: true,
xStart: true,
},
{
desc: "container with restart=always and with healthcheck",
config: &container.Config{Image: "busybox", Cmd: []string{"top"},
Healthcheck: &container.HealthConfig{
Test: []string{"CMD-SHELL", "sleep 1"},
Interval: time.Second,
},
},
hostConfig: &container.HostConfig{RestartPolicy: container.RestartPolicy{Name: "always"}},
xRunning: true,
xRunningLiveRestore: true,
xStart: true,
xHealthCheck: true,
},
{
desc: "container created should not be restarted",
config: &container.Config{Image: "busybox", Cmd: []string{"top"}},
Expand Down Expand Up @@ -107,9 +124,32 @@ func TestDaemonRestartKillContainers(t *testing.T) {

}
assert.Equal(t, expected, running, "got unexpected running state, expected %v, got: %v", expected, running)

if c.xHealthCheck {
startTime := time.Now()
ctxPoll, cancel := context.WithTimeout(ctx, 30*time.Second)
defer cancel()
poll.WaitOn(t, pollForNewHealthCheck(ctxPoll, client, startTime, resp.ID), poll.WithDelay(100*time.Millisecond))
}
// TODO(cpuguy83): test pause states... this seems to be rather undefined currently
})
}
}
}
}

func pollForNewHealthCheck(ctx context.Context, client *client.Client, startTime time.Time, containerID string) func(log poll.LogT) poll.Result {
return func(log poll.LogT) poll.Result {
inspect, err := client.ContainerInspect(ctx, containerID)
if err != nil {
return poll.Error(err)
}
healthChecksTotal := len(inspect.State.Health.Log)
if healthChecksTotal > 0 {
if inspect.State.Health.Log[healthChecksTotal-1].Start.After(startTime) {
return poll.Success()
}
}
return poll.Continue("waiting for a new container healthcheck")
}
}

0 comments on commit a80c450

Please sign in to comment.