Skip to content

Commit

Permalink
Merge pull request kubernetes#105451 from claudiubelu/tests/log-pod-logs
Browse files Browse the repository at this point in the history
tests: Fetch the pod logs in failed cases
  • Loading branch information
k8s-ci-robot committed Nov 18, 2021
2 parents 203d145 + afb8569 commit 51b94de
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 6 deletions.
74 changes: 69 additions & 5 deletions test/e2e/framework/pod/resource.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ package pod
import (
"context"
"fmt"
"os"
"path/filepath"
"strconv"
"strings"
"time"
Expand All @@ -43,6 +45,11 @@ import (
// the pod has already reached completed state.
var errPodCompleted = fmt.Errorf("pod ran to completion")

// LabelLogOnPodFailure can be used to mark which Pods will have their logs logged in the case of
// a test failure. By default, if there are no Pods with this label, only the first 5 Pods will
// have their logs fetched.
const LabelLogOnPodFailure = "log-on-pod-failure"

// TODO: Move to its own subpkg.
// expectNoError checks if "err" is set, and if so, fails assertion while logging the error.
func expectNoError(err error, explain ...interface{}) {
Expand Down Expand Up @@ -405,14 +412,68 @@ func logPodTerminationMessages(pods []v1.Pod) {
}
}

// logPodLogs logs the container logs from pods in the given namespace. This can be helpful for debugging
// issues that do not cause the container to fail (e.g.: network connectivity issues)
// We will log the Pods that have the LabelLogOnPodFailure label. If there aren't any, we default to
// logging only the first 5 Pods. This requires the reportDir to be set, and the pods are logged into:
// {report_dir}/pods/{namespace}/{pod}/{container_name}/logs.txt
func logPodLogs(c clientset.Interface, namespace string, pods []v1.Pod, reportDir string) {
if reportDir == "" {
return
}

var logPods []v1.Pod
for _, pod := range pods {
if _, ok := pod.Labels[LabelLogOnPodFailure]; ok {
logPods = append(logPods, pod)
}
}
maxPods := len(logPods)

// There are no pods with the LabelLogOnPodFailure label, we default to the first 5 Pods.
if maxPods == 0 {
logPods = pods
maxPods = len(pods)
if maxPods > 5 {
maxPods = 5
}
}

tailLen := 42
for i := 0; i < maxPods; i++ {
pod := logPods[i]
for _, container := range pod.Spec.Containers {
logs, err := getPodLogsInternal(c, namespace, pod.Name, container.Name, false, nil, &tailLen)
if err != nil {
e2elog.Logf("Unable to fetch %s/%s/%s logs: %v", pod.Namespace, pod.Name, container.Name, err)
continue
}

logDir := filepath.Join(reportDir, namespace, pod.Name, container.Name)
err = os.MkdirAll(logDir, 0755)
if err != nil {
e2elog.Logf("Unable to create path '%s'. Err: %v", logDir, err)
continue
}

logPath := filepath.Join(logDir, "logs.txt")
err = os.WriteFile(logPath, []byte(logs), 0644)
if err != nil {
e2elog.Logf("Could not write the container logs in: %s. Err: %v", logPath, err)
}
}
}
}

// DumpAllPodInfoForNamespace logs all pod information for a given namespace.
func DumpAllPodInfoForNamespace(c clientset.Interface, namespace string) {
func DumpAllPodInfoForNamespace(c clientset.Interface, namespace, reportDir string) {
pods, err := c.CoreV1().Pods(namespace).List(context.TODO(), metav1.ListOptions{})
if err != nil {
e2elog.Logf("unable to fetch pod debug info: %v", err)
}
LogPodStates(pods.Items)
logPodTerminationMessages(pods.Items)
logPodLogs(c, namespace, pods.Items, reportDir)
}

// FilterNonRestartablePods filters out pods that will never get recreated if
Expand Down Expand Up @@ -561,23 +622,23 @@ func checkPodsCondition(c clientset.Interface, ns string, podNames []string, tim

// GetPodLogs returns the logs of the specified container (namespace/pod/container).
func GetPodLogs(c clientset.Interface, namespace, podName, containerName string) (string, error) {
return getPodLogsInternal(c, namespace, podName, containerName, false, nil)
return getPodLogsInternal(c, namespace, podName, containerName, false, nil, nil)
}

// GetPodLogsSince returns the logs of the specified container (namespace/pod/container) since a timestamp.
func GetPodLogsSince(c clientset.Interface, namespace, podName, containerName string, since time.Time) (string, error) {
sinceTime := metav1.NewTime(since)
return getPodLogsInternal(c, namespace, podName, containerName, false, &sinceTime)
return getPodLogsInternal(c, namespace, podName, containerName, false, &sinceTime, nil)
}

// GetPreviousPodLogs returns the logs of the previous instance of the
// specified container (namespace/pod/container).
func GetPreviousPodLogs(c clientset.Interface, namespace, podName, containerName string) (string, error) {
return getPodLogsInternal(c, namespace, podName, containerName, true, nil)
return getPodLogsInternal(c, namespace, podName, containerName, true, nil, nil)
}

// utility function for gomega Eventually
func getPodLogsInternal(c clientset.Interface, namespace, podName, containerName string, previous bool, sinceTime *metav1.Time) (string, error) {
func getPodLogsInternal(c clientset.Interface, namespace, podName, containerName string, previous bool, sinceTime *metav1.Time, tailLines *int) (string, error) {
request := c.CoreV1().RESTClient().Get().
Resource("pods").
Namespace(namespace).
Expand All @@ -587,6 +648,9 @@ func getPodLogsInternal(c clientset.Interface, namespace, podName, containerName
if sinceTime != nil {
request.Param("sinceTime", sinceTime.Format(time.RFC3339))
}
if tailLines != nil {
request.Param("tailLines", strconv.Itoa(*tailLines))
}
logs, err := request.Do(context.TODO()).Raw()
if err != nil {
return "", err
Expand Down
2 changes: 1 addition & 1 deletion test/e2e/framework/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -899,7 +899,7 @@ func DumpAllNamespaceInfo(c clientset.Interface, namespace string) {
return c.CoreV1().Events(ns).List(context.TODO(), opts)
}, namespace)

e2epod.DumpAllPodInfoForNamespace(c, namespace)
e2epod.DumpAllPodInfoForNamespace(c, namespace, TestContext.ReportDir)

// If cluster is large, then the following logs are basically useless, because:
// 1. it takes tens of minutes or hours to grab all of them
Expand Down

0 comments on commit 51b94de

Please sign in to comment.