Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove potentially unhealthy symlink only for dead containers #89160

Merged
merged 1 commit into from Apr 28, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
26 changes: 26 additions & 0 deletions pkg/kubelet/kuberuntime/kuberuntime_gc.go
Expand Up @@ -356,9 +356,35 @@ func (cgc *containerGC) evictPodLogsDirectories(allSourcesReady bool) error {
logSymlinks, _ := osInterface.Glob(filepath.Join(legacyContainerLogsDir, fmt.Sprintf("*.%s", legacyLogSuffix)))
for _, logSymlink := range logSymlinks {
if _, err := osInterface.Stat(logSymlink); os.IsNotExist(err) {
if containerID, err := getContainerIDFromLegacyLogSymlink(logSymlink); err == nil {
tedyu marked this conversation as resolved.
Show resolved Hide resolved
tedyu marked this conversation as resolved.
Show resolved Hide resolved
status, err := cgc.manager.runtimeService.ContainerStatus(containerID)
if err != nil {
// TODO: we should handle container not found (i.e. container was deleted) case differently
// once https://github.com/kubernetes/kubernetes/issues/63336 is resolved
klog.Infof("Error getting ContainerStatus for containerID %q: %v", containerID, err)
tedyu marked this conversation as resolved.
Show resolved Hide resolved
} else if status.State != runtimeapi.ContainerState_CONTAINER_EXITED {
// Here is how container log rotation works (see containerLogManager#rotateLatestLog):
//
// 1. rename current log to rotated log file whose filename contains current timestamp (fmt.Sprintf("%s.%s", log, timestamp))
// 2. reopen the container log
// 3. if #2 fails, rename rotated log file back to container log
//
// There is small but indeterministic amount of time during which log file doesn't exist (between steps #1 and #2, between #1 and #3).
// Hence the symlink may be deemed unhealthy during that period.
// See https://github.com/kubernetes/kubernetes/issues/52172
//
// We only remove unhealthy symlink for dead containers
klog.V(5).Infof("Container %q is still running, not removing symlink %q.", containerID, logSymlink)
continue
}
} else {
klog.V(4).Infof("unable to obtain container Id: %v", err)
}
err := osInterface.Remove(logSymlink)
if err != nil {
klog.Errorf("Failed to remove container log dead symlink %q: %v", logSymlink, err)
} else {
klog.V(4).Infof("removed symlink %s", logSymlink)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

%q

}
}
}
Expand Down
20 changes: 20 additions & 0 deletions pkg/kubelet/kuberuntime/legacy.go
Expand Up @@ -19,6 +19,7 @@ package kuberuntime
import (
"fmt"
"path"
"strings"

kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
)
Expand All @@ -44,6 +45,25 @@ func legacyLogSymlink(containerID string, containerName, podName, podNamespace s
containerName, containerID)
}

// getContainerIDFromLegacyLogSymlink returns error if container Id cannot be parsed
func getContainerIDFromLegacyLogSymlink(logSymlink string) (string, error) {
parts := strings.Split(logSymlink, "-")
if len(parts) == 0 {
return "", fmt.Errorf("unable to find separator in %q", logSymlink)
}
containerIDWithSuffix := parts[len(parts)-1]
suffix := fmt.Sprintf(".%s", legacyLogSuffix)
if !strings.HasSuffix(containerIDWithSuffix, suffix) {
return "", fmt.Errorf("%q doesn't end with %q", logSymlink, suffix)
}
containerIDWithoutSuffix := strings.TrimSuffix(containerIDWithSuffix, suffix)
// container can be retrieved with container Id as short as 6 characters
if len(containerIDWithoutSuffix) < 6 {
tedyu marked this conversation as resolved.
Show resolved Hide resolved
return "", fmt.Errorf("container Id %q is too short", containerIDWithoutSuffix)
}
return containerIDWithoutSuffix, nil
}

func logSymlink(containerLogsDir, podFullName, containerName, containerID string) string {
suffix := fmt.Sprintf(".%s", legacyLogSuffix)
logPath := fmt.Sprintf("%s_%s-%s", podFullName, containerName, containerID)
Expand Down
2 changes: 1 addition & 1 deletion pkg/kubelet/remote/remote_runtime.go
Expand Up @@ -473,7 +473,7 @@ func (r *RemoteRuntimeService) ContainerStats(containerID string) (*runtimeapi.C
})
if err != nil {
if r.logReduction.ShouldMessageBePrinted(err.Error(), containerID) {
klog.Errorf("ContainerStatus %q from runtime service failed: %v", containerID, err)
klog.Errorf("ContainerStats %q from runtime service failed: %v", containerID, err)
}
return nil, err
}
Expand Down