Skip to content

Commit

Permalink
implementation of split disk kep
Browse files Browse the repository at this point in the history
  • Loading branch information
kannon92 committed Nov 1, 2023
1 parent 9747358 commit 26923b9
Show file tree
Hide file tree
Showing 39 changed files with 3,533 additions and 764 deletions.
8 changes: 8 additions & 0 deletions pkg/features/kube_features.go
Expand Up @@ -454,6 +454,12 @@ const (
// Enable POD resources API to return allocatable resources
KubeletPodResourcesGetAllocatable featuregate.Feature = "KubeletPodResourcesGetAllocatable"

// KubeletSeparateDiskGC enables Kubelet to garbage collection images/containers on different filesystems
// owner: @kannon92
// kep: https://kep.k8s.io/4191
// alpha: v1.29
KubeletSeparateDiskGC featuregate.Feature = "KubeletSeparateDiskGC"

// owner: @sallyom
// kep: https://kep.k8s.io/2832
// alpha: v1.25
Expand Down Expand Up @@ -1088,6 +1094,8 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS

KubeletPodResourcesGetAllocatable: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // GA in 1.28, remove in 1.30

KubeletSeparateDiskGC: {Default: false, PreRelease: featuregate.Alpha},

KubeletTracing: {Default: true, PreRelease: featuregate.Beta},

KubeProxyDrainingTerminatingNodes: {Default: false, PreRelease: featuregate.Alpha},
Expand Down
8 changes: 8 additions & 0 deletions pkg/kubelet/cadvisor/cadvisor_linux.go
Expand Up @@ -186,6 +186,14 @@ func (cc *cadvisorClient) getFsInfo(label string) (cadvisorapiv2.FsInfo, error)
return res[0], nil
}

func (cc *cadvisorClient) ContainerFsInfo() (cadvisorapiv2.FsInfo, error) {
label, err := cc.imageFsInfoProvider.ContainerFsInfoLabel()
if err != nil {
return cadvisorapiv2.FsInfo{}, err
}
return cc.getFsInfo(label)
}

func (cc *cadvisorClient) WatchEvents(request *events.Request) (*events.EventChannel, error) {
return cc.WatchForEvents(request)
}
29 changes: 29 additions & 0 deletions pkg/kubelet/cadvisor/cadvisor_linux_test.go
Expand Up @@ -57,3 +57,32 @@ func TestImageFsInfoLabel(t *testing.T) {
})
}
}

func TestContainerFsInfoLabel(t *testing.T) {
testcases := []struct {
description string
runtime string
runtimeEndpoint string
expectedLabel string
expectedError error
}{{
description: "LabelCrioWriteableImages should be returned",
runtimeEndpoint: crio.CrioSocket,
expectedLabel: LabelCrioContainers,
expectedError: nil,
}, {
description: "Cannot find valid imagefs label",
runtimeEndpoint: "",
expectedLabel: "",
expectedError: fmt.Errorf("no containerfs label for configured runtime"),
}}

for _, tc := range testcases {
t.Run(tc.description, func(t *testing.T) {
infoProvider := NewImageFsInfoProvider(tc.runtimeEndpoint)
label, err := infoProvider.ContainerFsInfoLabel()
assert.Equal(t, tc.expectedLabel, label)
assert.Equal(t, tc.expectedError, err)
})
}
}
4 changes: 4 additions & 0 deletions pkg/kubelet/cadvisor/cadvisor_unsupported.go
Expand Up @@ -79,6 +79,10 @@ func (cu *cadvisorUnsupported) RootFsInfo() (cadvisorapiv2.FsInfo, error) {
return cadvisorapiv2.FsInfo{}, errUnsupported
}

func (cu *cadvisorUnsupported) ContainerFsInfo() (cadvisorapiv2.FsInfo, error) {
return cadvisorapiv2.FsInfo{}, errUnsupported
}

func (cu *cadvisorUnsupported) WatchEvents(request *events.Request) (*events.EventChannel, error) {
return nil, errUnsupported
}
Expand Down
4 changes: 4 additions & 0 deletions pkg/kubelet/cadvisor/cadvisor_windows.go
Expand Up @@ -79,6 +79,10 @@ func (cu *cadvisorClient) ImagesFsInfo() (cadvisorapiv2.FsInfo, error) {
return cadvisorapiv2.FsInfo{}, nil
}

func (cu *cadvisorClient) ContainerFsInfo() (cadvisorapiv2.FsInfo, error) {
return cadvisorapiv2.FsInfo{}, nil
}

func (cu *cadvisorClient) RootFsInfo() (cadvisorapiv2.FsInfo, error) {
return cu.GetDirFsInfo(cu.rootPath)
}
Expand Down
26 changes: 22 additions & 4 deletions pkg/kubelet/cadvisor/helpers_linux.go
Expand Up @@ -26,6 +26,11 @@ import (
cadvisorfs "github.com/google/cadvisor/fs"
)

// LabelCrioContainers is a label to allow for cadvisor to track writeable layers
// separately from read-only layers.
// Once CAdvisor upstream changes are merged, we should remove this constant
const LabelCrioContainers string = "crio-containers"

// imageFsInfoProvider knows how to translate the configured runtime
// to its file system label for images.
type imageFsInfoProvider struct {
Expand All @@ -35,15 +40,28 @@ type imageFsInfoProvider struct {
// ImageFsInfoLabel returns the image fs label for the configured runtime.
// For remote runtimes, it handles additional runtimes natively understood by cAdvisor.
func (i *imageFsInfoProvider) ImageFsInfoLabel() (string, error) {
// This is a temporary workaround to get stats for cri-o from cadvisor
// and should be removed.
// Related to https://github.com/kubernetes/kubernetes/issues/51798
if strings.HasSuffix(i.runtimeEndpoint, CrioSocketSuffix) {
if detectCrioWorkaround(i) {
return cadvisorfs.LabelCrioImages, nil
}
return "", fmt.Errorf("no imagefs label for configured runtime")
}

// ContainerFsInfoLabel returns the container fs label for the configured runtime.
// For remote runtimes, it handles addition runtimes natively understood by cAdvisor.
func (i *imageFsInfoProvider) ContainerFsInfoLabel() (string, error) {
if detectCrioWorkaround(i) {
return LabelCrioContainers, nil
}
return "", fmt.Errorf("no containerfs label for configured runtime")
}

// This is a temporary workaround to get stats for cri-o from cadvisor
// and should be removed.
// Related to https://github.com/kubernetes/kubernetes/issues/51798
func detectCrioWorkaround(i *imageFsInfoProvider) bool {
return strings.HasSuffix(i.runtimeEndpoint, CrioSocketSuffix)
}

// NewImageFsInfoProvider returns a provider for the specified runtime configuration.
func NewImageFsInfoProvider(runtimeEndpoint string) ImageFsInfoProvider {
return &imageFsInfoProvider{runtimeEndpoint: runtimeEndpoint}
Expand Down
4 changes: 4 additions & 0 deletions pkg/kubelet/cadvisor/helpers_unsupported.go
Expand Up @@ -29,6 +29,10 @@ func (i *unsupportedImageFsInfoProvider) ImageFsInfoLabel() (string, error) {
return "", errors.New("unsupported")
}

func (i *unsupportedImageFsInfoProvider) ContainerFsInfoLabel() (string, error) {
return "", errors.New("unsupported")
}

// NewImageFsInfoProvider returns a provider for the specified runtime configuration.
func NewImageFsInfoProvider(runtimeEndpoint string) ImageFsInfoProvider {
return &unsupportedImageFsInfoProvider{}
Expand Down
5 changes: 5 additions & 0 deletions pkg/kubelet/cadvisor/testing/cadvisor_fake.go
Expand Up @@ -101,6 +101,11 @@ func (c *Fake) RootFsInfo() (cadvisorapiv2.FsInfo, error) {
return cadvisorapiv2.FsInfo{}, nil
}

// ContainerFsInfo is a fake implementation of Interface.ContainerFsInfo.
func (c *Fake) ContainerFsInfo() (cadvisorapiv2.FsInfo, error) {
return cadvisorapiv2.FsInfo{}, nil
}

// WatchEvents is a fake implementation of Interface.WatchEvents.
func (c *Fake) WatchEvents(request *events.Request) (*events.EventChannel, error) {
return new(events.EventChannel), nil
Expand Down
30 changes: 30 additions & 0 deletions pkg/kubelet/cadvisor/testing/cadvisor_mock.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions pkg/kubelet/cadvisor/types.go
Expand Up @@ -41,6 +41,10 @@ type Interface interface {
// Returns usage information about the root filesystem.
RootFsInfo() (cadvisorapiv2.FsInfo, error)

// Returns usage information about the writeable layer.
// KEP 4191 can separate the image filesystem
ContainerFsInfo() (cadvisorapiv2.FsInfo, error)

// Get events streamed through passedChannel that fit the request.
WatchEvents(request *events.Request) (*events.EventChannel, error)

Expand All @@ -52,4 +56,6 @@ type Interface interface {
type ImageFsInfoProvider interface {
// ImageFsInfoLabel returns the label cAdvisor should use to find the filesystem holding container images.
ImageFsInfoLabel() (string, error)
// In split image filesystem this will be different from ImageFsInfoLabel
ContainerFsInfoLabel() (string, error)
}
21 changes: 21 additions & 0 deletions pkg/kubelet/container/container_gc.go
Expand Up @@ -45,6 +45,8 @@ type GC interface {
GarbageCollect(ctx context.Context) error
// Deletes all unused containers, including containers belonging to pods that are terminated but not deleted
DeleteAllUnusedContainers(ctx context.Context) error
// IsContainerFsSeparateFromImageFs tells if writeable layer and read-only layer are separate.
IsContainerFsSeparateFromImageFs(ctx context.Context) bool
}

// SourcesReadyProvider knows how to determine if configuration sources are ready
Expand Down Expand Up @@ -86,3 +88,22 @@ func (cgc *realContainerGC) DeleteAllUnusedContainers(ctx context.Context) error
klog.InfoS("Attempting to delete unused containers")
return cgc.runtime.GarbageCollect(ctx, cgc.policy, cgc.sourcesReadyProvider.AllReady(), true)
}

func (cgc *realContainerGC) IsContainerFsSeparateFromImageFs(ctx context.Context) bool {
resp, err := cgc.runtime.ImageFsInfo(ctx)
if err != nil {
return false
}
// These fields can be empty if CRI implementation didn't populate.
if resp.ContainerFilesystems == nil || resp.ImageFilesystems == nil || len(resp.ContainerFilesystems) == 0 || len(resp.ImageFilesystems) == 0 {
return false
}
// KEP 4191 explains that multiple filesystems for images and containers is not
// supported at the moment.
// See https://github.com/kubernetes/enhancements/tree/master/keps/sig-node/4191-split-image-filesystem#comment-on-future-extensions
// for work needed to support multiple filesystems.
if resp.ContainerFilesystems[0].FsId != nil && resp.ImageFilesystems[0].FsId != nil {
return resp.ContainerFilesystems[0].FsId.Mountpoint != resp.ImageFilesystems[0].FsId.Mountpoint
}
return false
}
96 changes: 96 additions & 0 deletions pkg/kubelet/container/container_gc_test.go
@@ -0,0 +1,96 @@
/*
Copyright 2023 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package container_test

import (
"context"
"reflect"
"testing"

runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
. "k8s.io/kubernetes/pkg/kubelet/container"
ctest "k8s.io/kubernetes/pkg/kubelet/container/testing"
)

func TestIsContainerFsSeparateFromImageFs(t *testing.T) {
runtime := &ctest.FakeRuntime{}
fakeSources := ctest.NewFakeReadyProvider()

gcContainer, err := NewContainerGC(runtime, GCPolicy{}, fakeSources)
if err != nil {
t.Errorf("unexpected error")
}

cases := []struct {
name string
containerFs []*runtimeapi.FilesystemUsage
imageFs []*runtimeapi.FilesystemUsage
writeableSeparateFromReadOnly bool
}{
{
name: "Only images",
imageFs: []*runtimeapi.FilesystemUsage{{FsId: &runtimeapi.FilesystemIdentifier{Mountpoint: "image"}}},
writeableSeparateFromReadOnly: false,
},
{
name: "images and containers",
imageFs: []*runtimeapi.FilesystemUsage{{FsId: &runtimeapi.FilesystemIdentifier{Mountpoint: "image"}}},
containerFs: []*runtimeapi.FilesystemUsage{{FsId: &runtimeapi.FilesystemIdentifier{Mountpoint: "container"}}},
writeableSeparateFromReadOnly: true,
},
{
name: "same filesystem",
imageFs: []*runtimeapi.FilesystemUsage{{FsId: &runtimeapi.FilesystemIdentifier{Mountpoint: "image"}}},
containerFs: []*runtimeapi.FilesystemUsage{{FsId: &runtimeapi.FilesystemIdentifier{Mountpoint: "image"}}},
writeableSeparateFromReadOnly: false,
},

{
name: "Only containers",
containerFs: []*runtimeapi.FilesystemUsage{{FsId: &runtimeapi.FilesystemIdentifier{Mountpoint: "image"}}},
writeableSeparateFromReadOnly: false,
},
{
name: "neither are specified",
writeableSeparateFromReadOnly: false,
},
{
name: "both are empty arrays",
writeableSeparateFromReadOnly: false,
containerFs: []*runtimeapi.FilesystemUsage{},
imageFs: []*runtimeapi.FilesystemUsage{},
},
{
name: "FsId does not exist",
writeableSeparateFromReadOnly: false,
containerFs: []*runtimeapi.FilesystemUsage{{UsedBytes: &runtimeapi.UInt64Value{Value: 10}}},
imageFs: []*runtimeapi.FilesystemUsage{{UsedBytes: &runtimeapi.UInt64Value{Value: 10}}},
},
}

for _, tc := range cases {
runtime.SetContainerFsStats(tc.containerFs)
runtime.SetImageFsStats(tc.imageFs)
actualCommand := gcContainer.IsContainerFsSeparateFromImageFs(context.TODO())

if e, a := tc.writeableSeparateFromReadOnly, actualCommand; !reflect.DeepEqual(e, a) {
t.Errorf("%v: unexpected value; expected %v, got %v", tc.name, e, a)
}
runtime.SetContainerFsStats(nil)
runtime.SetImageFsStats(nil)
}
}
2 changes: 2 additions & 0 deletions pkg/kubelet/container/runtime.go
Expand Up @@ -160,6 +160,8 @@ type ImageService interface {
RemoveImage(ctx context.Context, image ImageSpec) error
// ImageStats returns Image statistics.
ImageStats(ctx context.Context) (*ImageStats, error)
// ImageFsInfo returns a list of file systems for containers/images
ImageFsInfo(ctx context.Context) (*runtimeapi.ImageFsInfoResponse, error)
}

// Attacher interface allows to attach a container.
Expand Down
36 changes: 36 additions & 0 deletions pkg/kubelet/container/testing/fake_ready_provider.go
@@ -0,0 +1,36 @@
/*
Copyright 2023 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package testing

import (
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
)

// FakeReadyProvider implements a fake ready provider
type FakeReadyProvider struct {
kubecontainer.SourcesReadyProvider
}

// AllReady notifies caller that the Fake Provider is ready.
func (frp *FakeReadyProvider) AllReady() bool {
return true
}

// NewFakeReadyProvider creates a FakeReadyProvider object
func NewFakeReadyProvider() kubecontainer.SourcesReadyProvider {
return &FakeReadyProvider{}
}

0 comments on commit 26923b9

Please sign in to comment.