Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

dockershim: remove corrupt checkpoints immediately upon detection #55641

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 0 additions & 1 deletion hack/.golint_failures
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,6 @@ pkg/kubelet/container/testing
pkg/kubelet/custommetrics
pkg/kubelet/dockershim
pkg/kubelet/dockershim/cm
pkg/kubelet/dockershim/errors
pkg/kubelet/dockershim/libdocker
pkg/kubelet/dockershim/remote
pkg/kubelet/dockershim/testing
Expand Down
2 changes: 0 additions & 2 deletions pkg/kubelet/dockershim/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ go_library(
"//pkg/kubelet/cm:go_default_library",
"//pkg/kubelet/container:go_default_library",
"//pkg/kubelet/dockershim/cm:go_default_library",
"//pkg/kubelet/dockershim/errors:go_default_library",
"//pkg/kubelet/dockershim/libdocker:go_default_library",
"//pkg/kubelet/dockershim/metrics:go_default_library",
"//pkg/kubelet/leaky:go_default_library",
Expand Down Expand Up @@ -142,7 +141,6 @@ filegroup(
srcs = [
":package-srcs",
"//pkg/kubelet/dockershim/cm:all-srcs",
"//pkg/kubelet/dockershim/errors:all-srcs",
"//pkg/kubelet/dockershim/libdocker:all-srcs",
"//pkg/kubelet/dockershim/metrics:all-srcs",
"//pkg/kubelet/dockershim/remote:all-srcs",
Expand Down
11 changes: 6 additions & 5 deletions pkg/kubelet/dockershim/docker_checkpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ import (
"path/filepath"

"github.com/golang/glog"
"k8s.io/kubernetes/pkg/kubelet/dockershim/errors"
utilstore "k8s.io/kubernetes/pkg/kubelet/util/store"
utilfs "k8s.io/kubernetes/pkg/util/filesystem"
hashutil "k8s.io/kubernetes/pkg/util/hash"
Expand Down Expand Up @@ -113,12 +112,14 @@ func (handler *PersistentCheckpointHandler) GetCheckpoint(podSandboxID string) (
//TODO: unmarhsal into a struct with just Version, check version, unmarshal into versioned type.
err = json.Unmarshal(blob, &checkpoint)
if err != nil {
glog.Errorf("Failed to unmarshal checkpoint %q. Checkpoint content: %q. ErrMsg: %v", podSandboxID, string(blob), err)
return &checkpoint, errors.CorruptCheckpointError
glog.Errorf("Failed to unmarshal checkpoint %q, removing checkpoint. Checkpoint content: %q. ErrMsg: %v", podSandboxID, string(blob), err)
handler.RemoveCheckpoint(podSandboxID)
return nil, fmt.Errorf("failed to unmarshal checkpoint")
}
if checkpoint.CheckSum != calculateChecksum(checkpoint) {
glog.Errorf("Checksum of checkpoint %q is not valid", podSandboxID)
return &checkpoint, errors.CorruptCheckpointError
glog.Errorf("Checksum of checkpoint %q is not valid, removing checkpoint", podSandboxID)
handler.RemoveCheckpoint(podSandboxID)
return nil, fmt.Errorf("checkpoint is corrupted")
}
return &checkpoint, nil
}
Expand Down
20 changes: 1 addition & 19 deletions pkg/kubelet/dockershim/docker_sandbox.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ import (
utilerrors "k8s.io/apimachinery/pkg/util/errors"
runtimeapi "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/dockershim/errors"
"k8s.io/kubernetes/pkg/kubelet/dockershim/libdocker"
"k8s.io/kubernetes/pkg/kubelet/qos"
"k8s.io/kubernetes/pkg/kubelet/types"
Expand Down Expand Up @@ -193,20 +192,10 @@ func (ds *dockerService) StopPodSandbox(podSandboxID string) error {
// actions will only have sandbox ID and not have pod namespace and name information.
// Return error if encounter any unexpected error.
if checkpointErr != nil {
if libdocker.IsContainerNotFoundError(statusErr) && checkpointErr == errors.CheckpointNotFoundError {
if libdocker.IsContainerNotFoundError(statusErr) {
glog.Warningf("Both sandbox container and checkpoint for id %q could not be found. "+
"Proceed without further sandbox information.", podSandboxID)
} else {
if checkpointErr == errors.CorruptCheckpointError {
// Remove the corrupted checkpoint so that the next
// StopPodSandbox call can proceed. This may indicate that
// some resources won't be reclaimed.
// TODO (#43021): Fix this properly.
glog.Warningf("Removing corrupted checkpoint %q: %+v", podSandboxID, *checkpoint)
if err := ds.checkpointHandler.RemoveCheckpoint(podSandboxID); err != nil {
glog.Warningf("Unable to remove corrupted checkpoint %q: %v", podSandboxID, err)
}
}
return utilerrors.NewAggregate([]error{
fmt.Errorf("failed to get checkpoint for sandbox %q: %v", podSandboxID, checkpointErr),
fmt.Errorf("failed to get sandbox status: %v", statusErr)})
Expand Down Expand Up @@ -488,13 +477,6 @@ func (ds *dockerService) ListPodSandbox(filter *runtimeapi.PodSandboxFilter) ([]
checkpoint, err := ds.checkpointHandler.GetCheckpoint(id)
if err != nil {
glog.Errorf("Failed to retrieve checkpoint for sandbox %q: %v", id, err)

if err == errors.CorruptCheckpointError {
glog.Warningf("Removing corrupted checkpoint %q: %+v", id, *checkpoint)
if err := ds.checkpointHandler.RemoveCheckpoint(id); err != nil {
glog.Warningf("Unable to remove corrupted checkpoint %q: %v", id, err)
}
}
continue
}
result = append(result, checkpointToRuntimeAPISandbox(id, checkpoint))
Expand Down
5 changes: 0 additions & 5 deletions pkg/kubelet/dockershim/docker_service.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ import (
kubecm "k8s.io/kubernetes/pkg/kubelet/cm"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/dockershim/cm"
"k8s.io/kubernetes/pkg/kubelet/dockershim/errors"
"k8s.io/kubernetes/pkg/kubelet/network"
"k8s.io/kubernetes/pkg/kubelet/network/cni"
"k8s.io/kubernetes/pkg/kubelet/network/hostport"
Expand Down Expand Up @@ -366,10 +365,6 @@ func (ds *dockerService) GetPodPortMappings(podSandboxID string) ([]*hostport.Po
checkpoint, err := ds.checkpointHandler.GetCheckpoint(podSandboxID)
// Return empty portMappings if checkpoint is not found
if err != nil {
if err == errors.CheckpointNotFoundError {
glog.Warningf("Failed to retrieve checkpoint for sandbox %q: %v", podSandboxID, err)
return nil, nil
}
return nil, err
}

Expand Down
25 changes: 0 additions & 25 deletions pkg/kubelet/dockershim/errors/BUILD

This file was deleted.

22 changes: 0 additions & 22 deletions pkg/kubelet/dockershim/errors/errors.go

This file was deleted.

1 change: 0 additions & 1 deletion pkg/kubelet/dockershim/testing/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ go_library(
name = "go_default_library",
srcs = ["util.go"],
importpath = "k8s.io/kubernetes/pkg/kubelet/dockershim/testing",
deps = ["//pkg/kubelet/dockershim/errors:go_default_library"],
)

filegroup(
Expand Down
5 changes: 2 additions & 3 deletions pkg/kubelet/dockershim/testing/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,8 @@ limitations under the License.
package testing

import (
"fmt"
"sync"

"k8s.io/kubernetes/pkg/kubelet/dockershim/errors"
)

// MemStore is an implementation of CheckpointStore interface which stores checkpoint in memory.
Expand All @@ -44,7 +43,7 @@ func (mstore *MemStore) Read(key string) ([]byte, error) {
defer mstore.Unlock()
data, ok := mstore.mem[key]
if !ok {
return nil, errors.CheckpointNotFoundError
return nil, fmt.Errorf("checkpoint is not found")
}
return data, nil
}
Expand Down