Skip to content

Commit

Permalink
UPSTREAM: <carry>: provide events, messages, and bodies for probe fai…
Browse files Browse the repository at this point in the history
…lures of important pods

UPSTREAM: <carry>: provide unique reason for pod probe event during termination

OpenShift-Rebase-Source: 01542fc
  • Loading branch information
deads2k authored and sairameshv committed Dec 4, 2023
1 parent 4564b2d commit 788722b
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 8 deletions.
56 changes: 56 additions & 0 deletions pkg/kubelet/prober/patch_prober.go
@@ -0,0 +1,56 @@
package prober

import (
"net/http"
"strings"
"time"

v1 "k8s.io/api/core/v1"
"k8s.io/klog/v2"
"k8s.io/kubernetes/pkg/probe"
httpprobe "k8s.io/kubernetes/pkg/probe/http"
)

func (pb *prober) maybeProbeForBody(prober httpprobe.Prober, req *http.Request, timeout time.Duration, pod *v1.Pod, container v1.Container, probeType probeType) (probe.Result, string, error) {
if !isInterestingPod(pod) {
return prober.Probe(req, timeout)
}
bodyProber, ok := prober.(httpprobe.DetailedProber)
if !ok {
return prober.Probe(req, timeout)
}
result, output, body, probeError := bodyProber.ProbeForBody(req, timeout)
switch result {
case probe.Success:
return result, output, probeError
case probe.Warning, probe.Failure, probe.Unknown:
// these pods are interesting enough to show the body content
klog.Infof("interesting pod/%s container/%s namespace/%s: %s probe status=%v output=%q start-of-body=%s",
pod.Name, container.Name, pod.Namespace, probeType, result, output, body)

reason := "ProbeError" // this is the normal value
if pod.DeletionTimestamp != nil {
// If the container was sent a sig-term, we want to have a different reason so we can distinguish this in our
// monitoring and watching code.
// Pod delete does this, but there are other possible reasons as well. We'll start with pod delete to improve the state of the world.
reason = "TerminatingPodProbeError"
}

// in fact, they are so interesting we'll try to send events for them
pb.recordContainerEvent(pod, &container, v1.EventTypeWarning, reason, "%s probe error: %s\nbody: %s\n", probeType, output, body)
return result, output, probeError
default:
return result, output, probeError
}
}

func isInterestingPod(pod *v1.Pod) bool {
if pod == nil {
return false
}
if strings.HasPrefix(pod.Namespace, "openshift-") {
return true
}

return false
}
2 changes: 1 addition & 1 deletion pkg/kubelet/prober/prober.go
Expand Up @@ -153,7 +153,7 @@ func (pb *prober) runProbe(ctx context.Context, probeType probeType, p *v1.Probe
headers := p.HTTPGet.HTTPHeaders
klogV4.InfoS("HTTP-Probe", "scheme", scheme, "host", host, "port", port, "path", path, "timeout", timeout, "headers", headers)
}
return pb.http.Probe(req, timeout)
return pb.maybeProbeForBody(pb.http, req, timeout, pod, container, probeType)
}
if p.TCPSocket != nil {
port, err := probe.ResolveContainerPort(p.TCPSocket.Port, &container)
Expand Down
16 changes: 9 additions & 7 deletions pkg/probe/http/http.go
Expand Up @@ -78,7 +78,8 @@ func (pr httpProber) Probe(req *http.Request, timeout time.Duration) (probe.Resu
Transport: pr.transport,
CheckRedirect: RedirectChecker(pr.followNonLocalRedirects),
}
return DoHTTPProbe(req, client)
result, details, _, err := DoHTTPProbe(req, client)
return result, details, err
}

// GetHTTPInterface is an interface for making HTTP requests, that returns a response and error.
Expand All @@ -90,36 +91,37 @@ type GetHTTPInterface interface {
// If the HTTP response code is successful (i.e. 400 > code >= 200), it returns Success.
// If the HTTP response code is unsuccessful or HTTP communication fails, it returns Failure.
// This is exported because some other packages may want to do direct HTTP probes.
func DoHTTPProbe(req *http.Request, client GetHTTPInterface) (probe.Result, string, error) {
func DoHTTPProbe(req *http.Request, client GetHTTPInterface) (probe.Result, string, string, error) {
url := req.URL
headers := req.Header
res, err := client.Do(req)
if err != nil {
// Convert errors into failures to catch timeouts.
return probe.Failure, err.Error(), nil
return probe.Failure, err.Error(), "", nil
}
defer res.Body.Close()
b, err := utilio.ReadAtMost(res.Body, maxRespBodyLength)
if err != nil {
if err == utilio.ErrLimitReached {
klog.V(4).Infof("Non fatal body truncation for %s, Response: %v", url.String(), *res)
} else {
return probe.Failure, "", err
return probe.Failure, "", "", err
}
}
body := string(b)
if res.StatusCode >= http.StatusOK && res.StatusCode < http.StatusBadRequest {
if res.StatusCode >= http.StatusMultipleChoices { // Redirect
klog.V(4).Infof("Probe terminated redirects for %s, Response: %v", url.String(), *res)
return probe.Warning, fmt.Sprintf("Probe terminated redirects, Response body: %v", body), nil
return probe.Warning, fmt.Sprintf("Probe terminated redirects, Response body: %v", body), body, nil
}
klog.V(4).Infof("Probe succeeded for %s, Response: %v", url.String(), *res)
return probe.Success, body, nil
return probe.Success, body, body, nil
}
klog.V(4).Infof("Probe failed for %s with request headers %v, response body: %v", url.String(), headers, body)
// Note: Until https://issue.k8s.io/99425 is addressed, this user-facing failure message must not contain the response body.
// @deads2k recommended we return the body. Slack discussion: https://redhat-internal.slack.com/archives/C04UQLWQAP3/p1679590747021409
failureMsg := fmt.Sprintf("HTTP probe failed with statuscode: %d", res.StatusCode)
return probe.Failure, failureMsg, nil
return probe.Failure, failureMsg, body, nil
}

// RedirectChecker returns a function that can be used to check HTTP redirects.
Expand Down
25 changes: 25 additions & 0 deletions pkg/probe/http/patch_http.go
@@ -0,0 +1,25 @@
package http

import (
"net/http"
"time"

"k8s.io/kubernetes/pkg/probe"
)

// Prober is an interface that defines the Probe function for doing HTTP readiness/liveness checks.
type DetailedProber interface {
ProbeForBody(req *http.Request, timeout time.Duration) (probe.Result, string, string, error)
}

// ProbeForBody returns a ProbeRunner capable of running an HTTP check.
// returns result, details, body, error
func (pr httpProber) ProbeForBody(req *http.Request, timeout time.Duration) (probe.Result, string, string, error) {
pr.transport.DisableCompression = true // removes Accept-Encoding header
client := &http.Client{
Timeout: timeout,
Transport: pr.transport,
CheckRedirect: RedirectChecker(pr.followNonLocalRedirects),
}
return DoHTTPProbe(req, client)
}

0 comments on commit 788722b

Please sign in to comment.