Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add timeouts to HealthChecks and retry checks #3593

Merged
merged 1 commit into from
Feb 5, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions pkg/api/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,8 @@ type Probe struct {
Handler `json:",inline"`
// Length of time before health checking is activated. In seconds.
InitialDelaySeconds int64 `json:"initialDelaySeconds,omitempty"`
// Length of time before health checking times out. In seconds.
TimeoutSeconds int64 `json:"timeoutSeconds,omitempty"`
}

// PullPolicy describes a policy for if/when to pull a container image
Expand Down
2 changes: 2 additions & 0 deletions pkg/api/v1beta1/conversion.go
Original file line number Diff line number Diff line change
Expand Up @@ -911,6 +911,7 @@ func init() {
return err
}
out.InitialDelaySeconds = in.InitialDelaySeconds
out.TimeoutSeconds = in.TimeoutSeconds
return nil
},
func(in *LivenessProbe, out *newer.Probe, s conversion.Scope) error {
Expand All @@ -924,6 +925,7 @@ func init() {
return err
}
out.InitialDelaySeconds = in.InitialDelaySeconds
out.TimeoutSeconds = in.TimeoutSeconds
return nil
},
)
Expand Down
2 changes: 2 additions & 0 deletions pkg/api/v1beta1/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,8 @@ type LivenessProbe struct {
Exec *ExecAction `json:"exec,omitempty" description:"parameters for exec-based liveness probe"`
// Length of time before health checking is activated. In seconds.
InitialDelaySeconds int64 `json:"initialDelaySeconds,omitempty" description:"number of seconds after the container has started before liveness probes are initiated"`
// Length of time before health checking times out. In seconds.
TimeoutSeconds int64 `json:"timeoutSeconds,omitempty" description:"number of seconds after which liveness probes timeout; defaults to 1 second"`
}

// PullPolicy describes a policy for if/when to pull a container image
Expand Down
2 changes: 2 additions & 0 deletions pkg/api/v1beta2/conversion.go
Original file line number Diff line number Diff line change
Expand Up @@ -824,6 +824,7 @@ func init() {
return err
}
out.InitialDelaySeconds = in.InitialDelaySeconds
out.TimeoutSeconds = in.TimeoutSeconds
return nil
},
func(in *LivenessProbe, out *newer.Probe, s conversion.Scope) error {
Expand All @@ -837,6 +838,7 @@ func init() {
return err
}
out.InitialDelaySeconds = in.InitialDelaySeconds
out.TimeoutSeconds = in.TimeoutSeconds
return nil
},
)
Expand Down
2 changes: 2 additions & 0 deletions pkg/api/v1beta2/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,8 @@ type LivenessProbe struct {
Exec *ExecAction `json:"exec,omitempty" description:"parameters for exec-based liveness probe"`
// Length of time before health checking is activated. In seconds.
InitialDelaySeconds int64 `json:"initialDelaySeconds,omitempty" description:"number of seconds after the container has started before liveness probes are initiated"`
// Length of time before health checking times out. In seconds.
TimeoutSeconds int64 `json:"timeoutSeconds,omitempty" description:"number of seconds after which liveness probes timeout; defaults to 1 second"`
}

// PullPolicy describes a policy for if/when to pull a container image
Expand Down
2 changes: 2 additions & 0 deletions pkg/api/v1beta3/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,8 @@ type Probe struct {
Handler `json:",inline"`
// Length of time before health checking is activated. In seconds.
InitialDelaySeconds int64 `json:"initialDelaySeconds,omitempty"`
// Length of time before health checking times out. In seconds.
TimeoutSeconds int64 `json:"timeoutSeconds,omitempty"`
}

// PullPolicy describes a policy for if/when to pull a container image
Expand Down
11 changes: 9 additions & 2 deletions pkg/kubelet/kubelet.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ const defaultChanSize = 1024
const minShares = 2
const sharesPerCPU = 1024
const milliCPUToCPU = 1000
const maxRetries int = 3

// SyncHandler is an interface implemented by Kubelet, for testability
type SyncHandler interface {
Expand Down Expand Up @@ -1417,15 +1418,21 @@ func (kl *Kubelet) GetPodStatus(podFullName string, uid types.UID) (api.PodStatu
return podStatus, err
}

func (kl *Kubelet) probeLiveness(podFullName string, podUID types.UID, status api.PodStatus, container api.Container, dockerContainer *docker.APIContainers) (probe.Status, error) {
func (kl *Kubelet) probeLiveness(podFullName string, podUID types.UID, status api.PodStatus, container api.Container, dockerContainer *docker.APIContainers) (healthStatus probe.Status, err error) {
// Give the container 60 seconds to start up.
if container.LivenessProbe == nil {
return probe.Success, nil
}
if time.Now().Unix()-dockerContainer.Created < container.LivenessProbe.InitialDelaySeconds {
return probe.Success, nil
}
return kl.probeContainer(container.LivenessProbe, podFullName, podUID, status, container)
for i := 0; i < maxRetries; i++ {
healthStatus, err = kl.probeContainer(container.LivenessProbe, podFullName, podUID, status, container)
if healthStatus == probe.Success {
return
}
}
return healthStatus, err
}

// Returns logs of current machine.
Expand Down
14 changes: 12 additions & 2 deletions pkg/kubelet/probe.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package kubelet
import (
"fmt"
"strconv"
"time"

"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
"github.com/GoogleCloudPlatform/kubernetes/pkg/probe"
Expand All @@ -39,6 +40,14 @@ var (
)

func (kl *Kubelet) probeContainer(p *api.Probe, podFullName string, podUID types.UID, status api.PodStatus, container api.Container) (probe.Status, error) {
var timeout time.Duration
secs := container.LivenessProbe.TimeoutSeconds
if secs > 0 {
timeout = time.Duration(secs) * time.Second
} else {
timeout = 1 * time.Second
}

if p.Exec != nil {
return execprober.Probe(kl.newExecInContainer(podFullName, podUID, container))
}
Expand All @@ -47,14 +56,15 @@ func (kl *Kubelet) probeContainer(p *api.Probe, podFullName string, podUID types
if err != nil {
return probe.Unknown, err
}
return httprober.Probe(extractGetParams(p.HTTPGet, status, port))
host, port, path := extractGetParams(p.HTTPGet, status, port)
return httprober.Probe(host, port, path, timeout)
}
if p.TCPSocket != nil {
port, err := extractPort(p.TCPSocket.Port, container)
if err != nil {
return probe.Unknown, err
}
return tcprober.Probe(status.PodIP, port)
return tcprober.Probe(status.PodIP, port, timeout)
}
glog.Warningf("Failed to find probe builder for %s %+v", container.Name, container.LivenessProbe)
return probe.Unknown, nil
Expand Down
10 changes: 6 additions & 4 deletions pkg/probe/http/http.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,23 +21,25 @@ import (
"net/http"
"net/url"
"strconv"
"time"

"github.com/GoogleCloudPlatform/kubernetes/pkg/probe"

"github.com/golang/glog"
)

func New() HTTPProber {
return HTTPProber{&http.Client{}}
transport := &http.Transport{}
return HTTPProber{transport}
}

type HTTPProber struct {
client HTTPGetInterface
transport *http.Transport
}

// Probe returns a ProbeRunner capable of running an http check.
func (pr *HTTPProber) Probe(host string, port int, path string) (probe.Status, error) {
return DoHTTPProbe(formatURL(host, port, path), pr.client)
func (pr *HTTPProber) Probe(host string, port int, path string, timeout time.Duration) (probe.Status, error) {
return DoHTTPProbe(formatURL(host, port, path), &http.Client{Timeout: timeout, Transport: pr.transport})
}

type HTTPGetInterface interface {
Expand Down
22 changes: 14 additions & 8 deletions pkg/probe/http/http_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"net/url"
"strconv"
"testing"
"time"

"github.com/GoogleCloudPlatform/kubernetes/pkg/probe"
)
Expand All @@ -46,20 +47,25 @@ func TestFormatURL(t *testing.T) {
}

func TestHTTPProbeChecker(t *testing.T) {
handleReq := func(s int) func(w http.ResponseWriter) {
return func(w http.ResponseWriter) { w.WriteHeader(s) }
}

prober := New()
testCases := []struct {
status int
health probe.Status
handler func(w http.ResponseWriter)
health probe.Status
}{
// The probe will be filled in below. This is primarily testing that an HTTP GET happens.
{http.StatusOK, probe.Success},
{-1, probe.Failure},
{handleReq(http.StatusOK), probe.Success},
{handleReq(-1), probe.Failure},
{func(w http.ResponseWriter) { time.Sleep(3 * time.Second) }, probe.Failure},
}
for _, test := range testCases {
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(test.status)
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
test.handler(w)
}))
u, err := url.Parse(ts.URL)
u, err := url.Parse(server.URL)
if err != nil {
t.Errorf("Unexpected error: %v", err)
}
Expand All @@ -71,7 +77,7 @@ func TestHTTPProbeChecker(t *testing.T) {
if err != nil {
t.Errorf("Unexpected error: %v", err)
}
health, err := prober.Probe(host, p, "")
health, err := prober.Probe(host, p, "", 1*time.Second)
if test.health == probe.Unknown && err == nil {
t.Errorf("Expected error")
}
Expand Down
9 changes: 5 additions & 4 deletions pkg/probe/tcp/tcp.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package tcp
import (
"net"
"strconv"
"time"

"github.com/GoogleCloudPlatform/kubernetes/pkg/probe"

Expand All @@ -31,16 +32,16 @@ func New() TCPProber {

type TCPProber struct{}

func (pr TCPProber) Probe(host string, port int) (probe.Status, error) {
return DoTCPProbe(net.JoinHostPort(host, strconv.Itoa(port)))
func (pr TCPProber) Probe(host string, port int, timeout time.Duration) (probe.Status, error) {
return DoTCPProbe(net.JoinHostPort(host, strconv.Itoa(port)), timeout)
}

// DoTCPProbe checks that a TCP socket to the address can be opened.
// If the socket can be opened, it returns Success
// If the socket fails to open, it returns Failure.
// This is exported because some other packages may want to do direct TCP probes.
func DoTCPProbe(addr string) (probe.Status, error) {
conn, err := net.Dial("tcp", addr)
func DoTCPProbe(addr string, timeout time.Duration) (probe.Status, error) {
conn, err := net.DialTimeout("tcp", addr, timeout)
if err != nil {
return probe.Failure, nil
}
Expand Down
3 changes: 2 additions & 1 deletion pkg/probe/tcp/tcp_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"net/url"
"strconv"
"testing"
"time"

"github.com/GoogleCloudPlatform/kubernetes/pkg/probe"
)
Expand Down Expand Up @@ -59,7 +60,7 @@ func TestTcpHealthChecker(t *testing.T) {
if !test.usePort {
p = -1
}
status, err := prober.Probe(host, p)
status, err := prober.Probe(host, p, 1*time.Second)
if status != test.expectedStatus {
t.Errorf("expected: %v, got: %v", test.expectedStatus, status)
}
Expand Down