Skip to content

Commit

Permalink
Merge pull request #40894 from cpuguy83/health_start_interval
Browse files Browse the repository at this point in the history
Add health start interval
  • Loading branch information
neersighted committed Jul 6, 2023
2 parents e77f9d2 + 2216d3c commit e4c866f
Show file tree
Hide file tree
Showing 8 changed files with 112 additions and 6 deletions.
8 changes: 8 additions & 0 deletions api/server/router/container/container_routes.go
Original file line number Diff line number Diff line change
Expand Up @@ -541,6 +541,14 @@ func (s *containerRouter) postContainersCreate(ctx context.Context, w http.Respo
bo.CreateMountpoint = false
}
}

}

if hostConfig != nil && versions.LessThan(version, "1.44") {
if config.Healthcheck != nil {
// StartInterval was added in API 1.44
config.Healthcheck.StartInterval = 0
}
}

if hostConfig != nil && versions.GreaterThanOrEqualTo(version, "1.42") {
Expand Down
6 changes: 6 additions & 0 deletions api/swagger.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -804,6 +804,12 @@ definitions:
1000000 (1 ms). 0 means inherit.
type: "integer"
format: "int64"
StartInterval:
description: |
The time to wait between checks in nanoseconds during the start period.
It should be 0 or at least 1000000 (1 ms). 0 means inherit.
type: "integer"
format: "int64"

Health:
description: |
Expand Down
7 changes: 4 additions & 3 deletions api/types/container/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,10 @@ type HealthConfig struct {
Test []string `json:",omitempty"`

// Zero means to inherit. Durations are expressed as integer nanoseconds.
Interval time.Duration `json:",omitempty"` // Interval is the time to wait between checks.
Timeout time.Duration `json:",omitempty"` // Timeout is the time to wait before considering the check to have hung.
StartPeriod time.Duration `json:",omitempty"` // The start period for the container to initialize before the retries starts to count down.
Interval time.Duration `json:",omitempty"` // Interval is the time to wait between checks.
Timeout time.Duration `json:",omitempty"` // Timeout is the time to wait before considering the check to have hung.
StartPeriod time.Duration `json:",omitempty"` // The start period for the container to initialize before the retries starts to count down.
StartInterval time.Duration `json:",omitempty"` // The interval to attempt healthchecks at during the start period

// Retries is the number of consecutive failures needed to consider a container as unhealthy.
// Zero means inherit.
Expand Down
3 changes: 3 additions & 0 deletions client/container_create.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ func (cli *Client) ContainerCreate(ctx context.Context, config *container.Config
if err := cli.NewVersionError("1.41", "specify container image platform"); platform != nil && err != nil {
return response, err
}
if err := cli.NewVersionError("1.44", "specify health-check start interval"); config != nil && config.Healthcheck != nil && config.Healthcheck.StartInterval != 0 && err != nil {
return response, err
}

if hostConfig != nil {
if versions.LessThan(cli.ClientVersion(), "1.25") {
Expand Down
3 changes: 3 additions & 0 deletions daemon/commit.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,9 @@ func merge(userConf, imageConf *containertypes.Config) error {
if userConf.Healthcheck.StartPeriod == 0 {
userConf.Healthcheck.StartPeriod = imageConf.Healthcheck.StartPeriod
}
if userConf.Healthcheck.StartInterval == 0 {
userConf.Healthcheck.StartInterval = imageConf.Healthcheck.StartInterval
}
if userConf.Healthcheck.Retries == 0 {
userConf.Healthcheck.Retries = imageConf.Healthcheck.Retries
}
Expand Down
25 changes: 22 additions & 3 deletions daemon/health.go
Original file line number Diff line number Diff line change
Expand Up @@ -248,13 +248,31 @@ func handleProbeResult(d *Daemon, c *container.Container, result *types.Healthch
// There is never more than one monitor thread running per container at a time.
func monitor(d *Daemon, c *container.Container, stop chan struct{}, probe probe) {
probeInterval := timeoutWithDefault(c.Config.Healthcheck.Interval, defaultProbeInterval)
startInterval := timeoutWithDefault(c.Config.Healthcheck.StartInterval, defaultProbeInterval)
startPeriod := timeoutWithDefault(c.Config.Healthcheck.StartPeriod, defaultStartPeriod)

intervalTimer := time.NewTimer(probeInterval)
c.Lock()
started := c.State.StartedAt
c.Unlock()

getInterval := func() time.Duration {
if time.Since(started) >= startPeriod {
return probeInterval
}
c.Lock()
status := c.Health.Health.Status
c.Unlock()

if status == types.Starting {
return startInterval
}
return probeInterval
}

intervalTimer := time.NewTimer(getInterval())
defer intervalTimer.Stop()

for {
intervalTimer.Reset(probeInterval)

select {
case <-stop:
log.G(context.TODO()).Debugf("Stop healthcheck monitoring for container %s (received while idle)", c.ID)
Expand Down Expand Up @@ -296,6 +314,7 @@ func monitor(d *Daemon, c *container.Container, stop chan struct{}, probe probe)
cancelProbe()
}
}
intervalTimer.Reset(getInterval())
}
}

Expand Down
2 changes: 2 additions & 0 deletions docs/api/version-history.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ keywords: "API, Docker, rcli, REST, documentation"
with runtimes which support the feature.
`POST /containers/create`, `GET /containers/{id}/json`, and `GET /containers/json` now supports
`BindOptions.ReadOnlyNonRecursive` and `BindOptions.ReadOnlyForceRecursive` to customize the behavior.
* `POST /containers/create` now accepts a `HealthConfig.StartInterval` to set the
interval for health checks during the start period.

## v1.43 API changes

Expand Down
64 changes: 64 additions & 0 deletions integration/container/health_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,70 @@ func TestHealthCheckProcessKilled(t *testing.T) {
poll.WaitOn(t, pollForHealthCheckLog(ctx, apiClient, cID, "Health check exceeded timeout (50ms): logs logs logs\n"))
}

func TestHealthStartInterval(t *testing.T) {
skip.If(t, testEnv.DaemonInfo.OSType == "windows", "The shell commands used in the test healthcheck do not work on Windows")
defer setupTest(t)()
ctx := context.Background()
client := testEnv.APIClient()

// Note: Windows is much slower than linux so this use longer intervals/timeouts
id := container.Run(ctx, t, client, func(c *container.TestContainerConfig) {
c.Config.Healthcheck = &containertypes.HealthConfig{
Test: []string{"CMD-SHELL", `count="$(cat /tmp/health)"; if [ -z "${count}" ]; then let count=0; fi; let count=${count}+1; echo -n ${count} | tee /tmp/health; if [ ${count} -lt 3 ]; then exit 1; fi`},
Interval: 30 * time.Second,
StartInterval: time.Second,
StartPeriod: 30 * time.Second,
}
})

ctxPoll, cancel := context.WithTimeout(ctx, 30*time.Second)
defer cancel()

dl, _ := ctxPoll.Deadline()

poll.WaitOn(t, func(log poll.LogT) poll.Result {
if ctxPoll.Err() != nil {
return poll.Error(ctxPoll.Err())
}
inspect, err := client.ContainerInspect(ctxPoll, id)
if err != nil {
return poll.Error(err)
}
if inspect.State.Health.Status != "healthy" {
if len(inspect.State.Health.Log) > 0 {
t.Log(inspect.State.Health.Log[len(inspect.State.Health.Log)-1])
}
return poll.Continue("waiting on container to be ready")
}
return poll.Success()
}, poll.WithDelay(100*time.Millisecond), poll.WithTimeout(time.Until(dl)))
cancel()

ctxPoll, cancel = context.WithTimeout(ctx, 2*time.Minute)
defer cancel()
dl, _ = ctxPoll.Deadline()

poll.WaitOn(t, func(log poll.LogT) poll.Result {
inspect, err := client.ContainerInspect(ctxPoll, id)
if err != nil {
return poll.Error(err)
}

hLen := len(inspect.State.Health.Log)
if hLen < 2 {
return poll.Continue("waiting for more healthcheck results")
}

h1 := inspect.State.Health.Log[hLen-1]
h2 := inspect.State.Health.Log[hLen-2]
if h1.Start.Sub(h2.Start) >= inspect.Config.Healthcheck.Interval {
return poll.Success()
}
t.Log(h1.Start.Sub(h2.Start))
return poll.Continue("waiting for health check interval to switch from the start interval")
}, poll.WithDelay(time.Second), poll.WithTimeout(time.Until(dl)))
}

func pollForHealthCheckLog(ctx context.Context, client client.APIClient, containerID string, expected string) func(log poll.LogT) poll.Result {
return func(log poll.LogT) poll.Result {
inspect, err := client.ContainerInspect(ctx, containerID)
Expand Down

0 comments on commit e4c866f

Please sign in to comment.