Skip to content

Commit

Permalink
Merge pull request kubernetes#123215 from adrianreber/2024-02-09-fore…
Browse files Browse the repository at this point in the history
…nsic-container-checkpointing-beta

Switch 'ContainerCheckpoint' from Alpha to Beta
  • Loading branch information
k8s-ci-robot committed Mar 1, 2024
2 parents 138f99b + da8ffcd commit cde4788
Show file tree
Hide file tree
Showing 6 changed files with 135 additions and 13 deletions.
3 changes: 2 additions & 1 deletion pkg/features/kube_features.go
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ const (
// owner: @adrianreber
// kep: https://kep.k8s.io/2008
// alpha: v1.25
// beta: v1.30
//
// Enables container Checkpoint support in the kubelet
ContainerCheckpoint featuregate.Feature = "ContainerCheckpoint"
Expand Down Expand Up @@ -975,7 +976,7 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS

CloudControllerManagerWebhook: {Default: false, PreRelease: featuregate.Alpha},

ContainerCheckpoint: {Default: false, PreRelease: featuregate.Alpha},
ContainerCheckpoint: {Default: true, PreRelease: featuregate.Beta},

ConsistentHTTPGetHandlers: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in 1.31

Expand Down
2 changes: 2 additions & 0 deletions pkg/kubelet/server/auth.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,8 @@ func (n nodeAuthorizerAttributesGetter) GetRequestAttributes(u user.Info, r *htt
case isSubpath(requestPath, logsPath):
// "log" to match other log subresources (pods/log, etc)
attrs.Subresource = "log"
case isSubpath(requestPath, checkpointPath):
attrs.Subresource = "checkpoint"
}

klog.V(5).InfoS("Node request attributes", "user", attrs.GetUser().GetName(), "verb", attrs.GetVerb(), "resource", attrs.GetResource(), "subresource", attrs.GetSubresource())
Expand Down
2 changes: 1 addition & 1 deletion pkg/kubelet/server/auth_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ func AuthzTestCases() []AuthzTestCase {
testPaths := map[string]string{
"/attach/{podNamespace}/{podID}/{containerName}": "proxy",
"/attach/{podNamespace}/{podID}/{uid}/{containerName}": "proxy",
"/checkpoint/{podNamespace}/{podID}/{containerName}": "proxy",
"/checkpoint/{podNamespace}/{podID}/{containerName}": "checkpoint",
"/configz": "proxy",
"/containerLogs/{podNamespace}/{podID}/{containerName}": "proxy",
"/debug/flags/v": "proxy",
Expand Down
3 changes: 2 additions & 1 deletion pkg/kubelet/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ const (
proberMetricsPath = "/metrics/probes"
statsPath = "/stats/"
logsPath = "/logs/"
checkpointPath = "/checkpoint/"
pprofBasePath = "/debug/pprof/"
debugFlagPath = "/debug/flags/v"
)
Expand Down Expand Up @@ -441,7 +442,7 @@ func (s *Server) InstallDefaultHandlers() {
if utilfeature.DefaultFeatureGate.Enabled(features.ContainerCheckpoint) {
s.addMetricsBucketMatcher("checkpoint")
ws = &restful.WebService{}
ws.Path("/checkpoint").Produces(restful.MIME_JSON)
ws.Path(checkpointPath).Produces(restful.MIME_JSON)
ws.Route(ws.POST("/{podNamespace}/{podID}/{containerName}").
To(s.checkpoint).
Operation("checkpoint"))
Expand Down
35 changes: 27 additions & 8 deletions pkg/kubelet/server/server_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -858,18 +858,24 @@ func TestContainerLogsWithInvalidTail(t *testing.T) {
}

func TestCheckpointContainer(t *testing.T) {
// Enable features.ContainerCheckpoint during test
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.ContainerCheckpoint, true)()

fw := newServerTest()
defer fw.testHTTPServer.Close()
podNamespace := "other"
podName := "foo"
expectedContainerName := "baz"
// GetPodByName() should always fail
fw.fakeKubelet.podByNameFunc = func(namespace, name string) (*v1.Pod, bool) {
return nil, false

setupTest := func(featureGate bool) *serverTestFramework {
// Enable features.ContainerCheckpoint during test
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.ContainerCheckpoint, featureGate)()

fw := newServerTest()
// GetPodByName() should always fail
fw.fakeKubelet.podByNameFunc = func(namespace, name string) (*v1.Pod, bool) {
return nil, false
}
return fw
}
fw := setupTest(true)
defer fw.testHTTPServer.Close()

t.Run("wrong pod namespace", func(t *testing.T) {
resp, err := http.Post(fw.testHTTPServer.URL+"/checkpoint/"+podNamespace+"/"+podName+"/"+expectedContainerName, "", nil)
if err != nil {
Expand Down Expand Up @@ -927,6 +933,19 @@ func TestCheckpointContainer(t *testing.T) {
}
assert.Equal(t, resp.StatusCode, 200)
})

// Now test for 404 if checkpointing support is explicitly disabled.
fw.testHTTPServer.Close()
fw = setupTest(false)
defer fw.testHTTPServer.Close()
setPodByNameFunc(fw, podNamespace, podName, expectedContainerName)
t.Run("checkpointing fails because disabled", func(t *testing.T) {
resp, err := http.Post(fw.testHTTPServer.URL+"/checkpoint/"+podNamespace+"/"+podName+"/"+expectedContainerName, "", nil)
if err != nil {
t.Errorf("Got error POSTing: %v", err)
}
assert.Equal(t, 404, resp.StatusCode)
})
}

func makeReq(t *testing.T, method, url, clientProtocol string) *http.Request {
Expand Down
103 changes: 101 additions & 2 deletions test/e2e_node/checkpoint_container.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,14 @@ import (
clientset "k8s.io/client-go/kubernetes"
restclient "k8s.io/client-go/rest"
"k8s.io/kubernetes/test/e2e/framework"
e2emetrics "k8s.io/kubernetes/test/e2e/framework/metrics"
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
"k8s.io/kubernetes/test/e2e/nodefeature"
testutils "k8s.io/kubernetes/test/utils"
imageutils "k8s.io/kubernetes/test/utils/image"
admissionapi "k8s.io/pod-security-admission/api"

"github.com/onsi/gomega"
)

const (
Expand Down Expand Up @@ -75,14 +78,69 @@ func proxyPostRequest(ctx context.Context, c clientset.Interface, node, endpoint
}
}

func getCheckpointContainerMetric(ctx context.Context, f *framework.Framework, pod *v1.Pod) (int, error) {
framework.Logf("Getting 'checkpoint_container' metrics from %q", pod.Spec.NodeName)
ms, err := e2emetrics.GetKubeletMetrics(
ctx,
f.ClientSet,
pod.Spec.NodeName,
)
if err != nil {
return 0, err
}

runtimeOperationsTotal, ok := ms["runtime_operations_total"]
if !ok {
// If the metric was not found it was probably not written to, yet.
return 0, nil
}

for _, item := range runtimeOperationsTotal {
if item.Metric["__name__"] == "kubelet_runtime_operations_total" && item.Metric["operation_type"] == "checkpoint_container" {
return int(item.Value), nil
}
}
// If the metric was not found it was probably not written to, yet.
return 0, nil
}

func getCheckpointContainerErrorMetric(ctx context.Context, f *framework.Framework, pod *v1.Pod) (int, error) {
framework.Logf("Getting 'checkpoint_container' error metrics from %q", pod.Spec.NodeName)
ms, err := e2emetrics.GetKubeletMetrics(
ctx,
f.ClientSet,
pod.Spec.NodeName,
)
if err != nil {
return 0, err
}

runtimeOperationsErrorsTotal, ok := ms["runtime_operations_errors_total"]
if !ok {
// If the metric was not found it was probably not written to, yet.
return 0, nil
}

for _, item := range runtimeOperationsErrorsTotal {
if item.Metric["__name__"] == "kubelet_runtime_operations_errors_total" && item.Metric["operation_type"] == "checkpoint_container" {
return int(item.Value), nil
}
}
// If the metric was not found it was probably not written to, yet.
return 0, nil
}

var _ = SIGDescribe("Checkpoint Container", nodefeature.CheckpointContainer, func() {
f := framework.NewDefaultFramework("checkpoint-container-test")
f.NamespacePodSecurityLevel = admissionapi.LevelBaseline
ginkgo.It("will checkpoint a container out of a pod", func(ctx context.Context) {
ginkgo.By("creating a target pod")
podClient := e2epod.NewPodClient(f)
pod := podClient.CreateSync(ctx, &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "checkpoint-container-pod"},
ObjectMeta: metav1.ObjectMeta{
Name: "checkpoint-container-pod",
Namespace: f.Namespace.Name,
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Expand All @@ -108,6 +166,15 @@ var _ = SIGDescribe("Checkpoint Container", nodefeature.CheckpointContainer, fun
framework.Failf("pod %q should be ready", p.Name)
}

// No checkpoint operation should have been logged
checkpointContainerMetric, err := getCheckpointContainerMetric(ctx, f, pod)
framework.ExpectNoError(err)
gomega.Expect(checkpointContainerMetric).To(gomega.Equal(0))
// No error should have been logged
checkpointContainerErrorMetric, err := getCheckpointContainerErrorMetric(ctx, f, pod)
framework.ExpectNoError(err)
gomega.Expect(checkpointContainerErrorMetric).To(gomega.Equal(0))

framework.Logf(
"About to checkpoint container %q on %q",
pod.Spec.Containers[0].Name,
Expand Down Expand Up @@ -144,6 +211,12 @@ var _ = SIGDescribe("Checkpoint Container", nodefeature.CheckpointContainer, fun
// If the container engine has not implemented the Checkpoint CRI API
// we will get 500 and a message with
// '(rpc error: code = Unimplemented desc = unknown method CheckpointContainer'
// or
// '(rpc error: code = Unimplemented desc = method CheckpointContainer not implemented)'
// if the container engine returns that it explicitly has disabled support for it.
// or
// '(rpc error: code = Unknown desc = checkpoint/restore support not available)'
// if the container engine explicitly disabled the checkpoint/restore support
if (int(statusError.ErrStatus.Code)) == http.StatusInternalServerError {
if strings.Contains(
statusError.ErrStatus.Message,
Expand All @@ -152,8 +225,26 @@ var _ = SIGDescribe("Checkpoint Container", nodefeature.CheckpointContainer, fun
ginkgo.Skip("Container engine does not implement 'CheckpointContainer'")
return
}
if strings.Contains(
statusError.ErrStatus.Message,
"(rpc error: code = Unimplemented desc = method CheckpointContainer not implemented)",
) {
ginkgo.Skip("Container engine does not implement 'CheckpointContainer'")
return
}
if strings.Contains(
statusError.ErrStatus.Message,
"(rpc error: code = Unknown desc = checkpoint/restore support not available)",
) {
ginkgo.Skip("Container engine does not implement 'CheckpointContainer'")
return
}
}
framework.Failf("Unexpected status code (%d) during 'CheckpointContainer'", statusError.ErrStatus.Code)
framework.Failf(
"Unexpected status code (%d) during 'CheckpointContainer': %q",
statusError.ErrStatus.Code,
statusError.ErrStatus.Message,
)
}

framework.ExpectNoError(err)
Expand Down Expand Up @@ -205,5 +296,13 @@ var _ = SIGDescribe("Checkpoint Container", nodefeature.CheckpointContainer, fun
// cleanup checkpoint archive
os.RemoveAll(item)
}
// Exactly one checkpoint operation should have happened
checkpointContainerMetric, err = getCheckpointContainerMetric(ctx, f, pod)
framework.ExpectNoError(err)
gomega.Expect(checkpointContainerMetric).To(gomega.Equal(1))
// No error should have been logged
checkpointContainerErrorMetric, err = getCheckpointContainerErrorMetric(ctx, f, pod)
framework.ExpectNoError(err)
gomega.Expect(checkpointContainerErrorMetric).To(gomega.Equal(0))
})
})

0 comments on commit cde4788

Please sign in to comment.