Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reduce the number of expensive calls in the Windows stats queries for dockershim #104287

Merged
merged 1 commit into from Nov 16, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
12 changes: 2 additions & 10 deletions pkg/kubelet/dockershim/docker_image_linux.go
Expand Up @@ -25,20 +25,12 @@ import (
"path/filepath"
"time"

"k8s.io/klog/v2"

runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
)

// ImageFsInfo returns information of the filesystem that is used to store images.
func (ds *dockerService) ImageFsInfo(_ context.Context, _ *runtimeapi.ImageFsInfoRequest) (*runtimeapi.ImageFsInfoResponse, error) {
info, err := ds.client.Info()
if err != nil {
klog.ErrorS(err, "Failed to get docker info")
return nil, err
}

bytes, inodes, err := dirSize(filepath.Join(info.DockerRootDir, "image"))
bytes, inodes, err := dirSize(filepath.Join(ds.dockerRootDir, "image"))
if err != nil {
return nil, err
}
Expand All @@ -48,7 +40,7 @@ func (ds *dockerService) ImageFsInfo(_ context.Context, _ *runtimeapi.ImageFsInf
{
Timestamp: time.Now().Unix(),
FsId: &runtimeapi.FilesystemIdentifier{
Mountpoint: info.DockerRootDir,
Mountpoint: ds.dockerRootDir,
},
UsedBytes: &runtimeapi.UInt64Value{
Value: uint64(bytes),
Expand Down
12 changes: 3 additions & 9 deletions pkg/kubelet/dockershim/docker_image_windows.go
Expand Up @@ -31,16 +31,10 @@ import (

// ImageFsInfo returns information of the filesystem that is used to store images.
func (ds *dockerService) ImageFsInfo(_ context.Context, _ *runtimeapi.ImageFsInfoRequest) (*runtimeapi.ImageFsInfoResponse, error) {
info, err := ds.client.Info()
if err != nil {
klog.ErrorS(err, "Failed to get docker info")
return nil, err
}

statsClient := &winstats.StatsClient{}
fsinfo, err := statsClient.GetDirFsInfo(info.DockerRootDir)
fsinfo, err := statsClient.GetDirFsInfo(ds.dockerRootDir)
if err != nil {
klog.ErrorS(err, "Failed to get fsInfo for dockerRootDir", "path", info.DockerRootDir)
klog.ErrorS(err, "Failed to get fsInfo for dockerRootDir", "path", ds.dockerRootDir)
return nil, err
}

Expand All @@ -49,7 +43,7 @@ func (ds *dockerService) ImageFsInfo(_ context.Context, _ *runtimeapi.ImageFsInf
Timestamp: time.Now().UnixNano(),
UsedBytes: &runtimeapi.UInt64Value{Value: fsinfo.Usage},
FsId: &runtimeapi.FilesystemIdentifier{
Mountpoint: info.DockerRootDir,
Mountpoint: ds.dockerRootDir,
},
},
}
Expand Down
18 changes: 11 additions & 7 deletions pkg/kubelet/dockershim/docker_service.go
Expand Up @@ -257,16 +257,17 @@ func NewDockerService(config *ClientConfig, podSandboxImage string, streamingCon
ds.network = network.NewPluginManager(plug)
klog.InfoS("Docker cri networking managed by the network plugin", "networkPluginName", plug.Name())

dockerInfo, err := ds.client.Info()
if err != nil {
return nil, fmt.Errorf("Failed to execute Info() call to the Docker client")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is a change of behavior. If docker is slow to respond, kubelet may fail to start now? I don't think it's a critical change, just leaving this comment for other reviewers to pay attention.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

agreed, I called this out here: #104287 (comment). There are other calls to the docker during this time that could cause a failure, so we should be confident that docker is running

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yep, I see it now =) Sorry missed it

}
klog.InfoS("Docker Info", "dockerInfo", dockerInfo)
ds.dockerRootDir = dockerInfo.DockerRootDir

// skipping cgroup driver checks for Windows
if runtime.GOOS == "linux" {
// NOTE: cgroup driver is only detectable in docker 1.11+
cgroupDriver := defaultCgroupDriver
dockerInfo, err := ds.client.Info()
klog.InfoS("Docker Info", "dockerInfo", dockerInfo)
if err != nil {
klog.ErrorS(err, "Failed to execute Info() call to the Docker client")
klog.InfoS("Falling back to use the default driver", "cgroupDriver", cgroupDriver)
} else if len(dockerInfo.CgroupDriver) == 0 {
if len(dockerInfo.CgroupDriver) == 0 {
klog.InfoS("No cgroup driver is set in Docker")
klog.InfoS("Falling back to use the default driver", "cgroupDriver", cgroupDriver)
} else {
Expand Down Expand Up @@ -314,6 +315,9 @@ type dockerService struct {
// the docker daemon every time we need to do such checks.
versionCache *cache.ObjectCache

// docker root directory
dockerRootDir string

// containerCleanupInfos maps container IDs to the `containerCleanupInfo` structs
// needed to clean up after containers have been removed.
// (see `applyPlatformSpecificDockerConfig` and `performPlatformSpecificContainerCleanup`
Expand Down
1 change: 1 addition & 0 deletions pkg/kubelet/dockershim/docker_service_test.go
Expand Up @@ -85,6 +85,7 @@ func newTestDockerService() (*dockerService, *libdocker.FakeDockerClient, *testi
network: pm,
checkpointManager: ckm,
networkReady: make(map[string]bool),
dockerRootDir: "/docker/root/dir",
}, c, fakeClock
}

Expand Down
17 changes: 14 additions & 3 deletions pkg/kubelet/dockershim/docker_stats.go
Expand Up @@ -22,15 +22,26 @@ package dockershim
import (
"context"
"errors"
"fmt"

runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
)

var ErrNotImplemented = errors.New("Not implemented")

// ContainerStats returns stats for a container stats request based on container id.
func (ds *dockerService) ContainerStats(_ context.Context, r *runtimeapi.ContainerStatsRequest) (*runtimeapi.ContainerStatsResponse, error) {
stats, err := ds.getContainerStats(r.ContainerId)
func (ds *dockerService) ContainerStats(ctx context.Context, r *runtimeapi.ContainerStatsRequest) (*runtimeapi.ContainerStatsResponse, error) {
filter := &runtimeapi.ContainerFilter{
Id: r.ContainerId,
}
listResp, err := ds.ListContainers(ctx, &runtimeapi.ListContainersRequest{Filter: filter})
if err != nil {
return nil, err
}
if len(listResp.Containers) != 1 {
return nil, fmt.Errorf("container with id %s not found", r.ContainerId)
}
stats, err := ds.getContainerStats(listResp.Containers[0])
if err != nil {
return nil, err
}
Expand All @@ -55,7 +66,7 @@ func (ds *dockerService) ListContainerStats(ctx context.Context, r *runtimeapi.L

var stats []*runtimeapi.ContainerStats
for _, container := range listResp.Containers {
containerStats, err := ds.getContainerStats(container.Id)
containerStats, err := ds.getContainerStats(container)
if err != nil {
return nil, err
}
Expand Down
28 changes: 8 additions & 20 deletions pkg/kubelet/dockershim/docker_stats_linux.go
Expand Up @@ -20,42 +20,30 @@ limitations under the License.
package dockershim

import (
"context"
"time"

runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
)

func (ds *dockerService) getContainerStats(containerID string) (*runtimeapi.ContainerStats, error) {
info, err := ds.client.Info()
func (ds *dockerService) getContainerStats(c *runtimeapi.Container) (*runtimeapi.ContainerStats, error) {
statsJSON, err := ds.client.GetContainerStats(c.Id)
if err != nil {
return nil, err
}

statsJSON, err := ds.client.GetContainerStats(containerID)
containerJSON, err := ds.client.InspectContainerWithSize(c.Id)
if err != nil {
return nil, err
}

containerJSON, err := ds.client.InspectContainerWithSize(containerID)
if err != nil {
return nil, err
}

statusResp, err := ds.ContainerStatus(context.Background(), &runtimeapi.ContainerStatusRequest{ContainerId: containerID})
if err != nil {
return nil, err
}
status := statusResp.GetStatus()

dockerStats := statsJSON.Stats
timestamp := time.Now().UnixNano()
containerStats := &runtimeapi.ContainerStats{
Attributes: &runtimeapi.ContainerAttributes{
Id: containerID,
Metadata: status.Metadata,
Labels: status.Labels,
Annotations: status.Annotations,
Id: c.Id,
Metadata: c.Metadata,
Labels: c.Labels,
Annotations: c.Annotations,
},
Cpu: &runtimeapi.CpuUsage{
Timestamp: timestamp,
Expand All @@ -67,7 +55,7 @@ func (ds *dockerService) getContainerStats(containerID string) (*runtimeapi.Cont
},
WritableLayer: &runtimeapi.FilesystemUsage{
Timestamp: timestamp,
FsId: &runtimeapi.FilesystemIdentifier{Mountpoint: info.DockerRootDir},
FsId: &runtimeapi.FilesystemIdentifier{Mountpoint: ds.dockerRootDir},
UsedBytes: &runtimeapi.UInt64Value{Value: uint64(*containerJSON.SizeRw)},
},
}
Expand Down
27 changes: 20 additions & 7 deletions pkg/kubelet/dockershim/docker_stats_test.go
Expand Up @@ -23,35 +23,48 @@ import (
"testing"

dockertypes "github.com/docker/docker/api/types"
"github.com/docker/docker/api/types/container"
"github.com/stretchr/testify/assert"
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
"k8s.io/kubernetes/pkg/kubelet/dockershim/libdocker"
)

func TestContainerStats(t *testing.T) {
labels := map[string]string{containerTypeLabelKey: containerTypeLabelContainer}
tests := map[string]struct {
containerID string
container *libdocker.FakeContainer
containerStats *dockertypes.StatsJSON
calledDetails []libdocker.CalledDetail
}{
"container exists": {
"fake_container",
&libdocker.FakeContainer{ID: "fake_container"},
"k8s_fake_container",
&libdocker.FakeContainer{
ID: "k8s_fake_container",
Name: "k8s_fake_container_1_2_1",
Config: &container.Config{
Labels: labels,
},
},
&dockertypes.StatsJSON{},
[]libdocker.CalledDetail{
libdocker.NewCalledDetail("list", nil),
libdocker.NewCalledDetail("get_container_stats", nil),
libdocker.NewCalledDetail("inspect_container_withsize", nil),
libdocker.NewCalledDetail("inspect_container", nil),
libdocker.NewCalledDetail("inspect_image", nil),
},
},
"container doesn't exists": {
"nonexistant_fake_container",
&libdocker.FakeContainer{ID: "fake_container"},
"k8s_nonexistant_fake_container",
&libdocker.FakeContainer{
ID: "k8s_fake_container",
Name: "k8s_fake_container_1_2_1",
Config: &container.Config{
Labels: labels,
},
},
&dockertypes.StatsJSON{},
[]libdocker.CalledDetail{
libdocker.NewCalledDetail("get_container_stats", nil),
libdocker.NewCalledDetail("list", nil),
},
},
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/kubelet/dockershim/docker_stats_unsupported.go
Expand Up @@ -25,6 +25,6 @@ import (
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
)

func (ds *dockerService) getContainerStats(containerID string) (*runtimeapi.ContainerStats, error) {
func (ds *dockerService) getContainerStats(c *runtimeapi.Container) (*runtimeapi.ContainerStats, error) {
return nil, fmt.Errorf("not implemented")
}
42 changes: 14 additions & 28 deletions pkg/kubelet/dockershim/docker_stats_windows.go
Expand Up @@ -20,7 +20,6 @@ limitations under the License.
package dockershim

import (
"context"
"strings"
"time"

Expand All @@ -29,26 +28,21 @@ import (
"k8s.io/klog/v2"
)

func (ds *dockerService) getContainerStats(containerID string) (*runtimeapi.ContainerStats, error) {
info, err := ds.client.Info()
if err != nil {
return nil, err
}

hcsshimContainer, err := hcsshim.OpenContainer(containerID)
func (ds *dockerService) getContainerStats(c *runtimeapi.Container) (*runtimeapi.ContainerStats, error) {
hcsshimContainer, err := hcsshim.OpenContainer(c.Id)
if err != nil {
// As we moved from using Docker stats to hcsshim directly, we may query HCS with already exited container IDs.
// That will typically happen with init-containers in Exited state. Docker still knows about them but the HCS does not.
// As we don't want to block stats retrieval for other containers, we only log errors.
if !hcsshim.IsNotExist(err) && !hcsshim.IsAlreadyStopped(err) {
klog.V(4).InfoS("Error opening container (stats will be missing)", "containerID", containerID, "err", err)
klog.V(4).InfoS("Error opening container (stats will be missing)", "containerID", c.Id, "err", err)
}
return nil, nil
}
defer func() {
closeErr := hcsshimContainer.Close()
if closeErr != nil {
klog.ErrorS(closeErr, "Error closing container", "containerID", containerID)
klog.ErrorS(closeErr, "Error closing container", "containerID", c.Id)
}
}()

Expand All @@ -61,30 +55,19 @@ func (ds *dockerService) getContainerStats(containerID string) (*runtimeapi.Cont
// These hcs errors do not have helpers exposed in public package so need to query for the known codes
// https://github.com/microsoft/hcsshim/blob/master/internal/hcs/errors.go
// PR to expose helpers in hcsshim: https://github.com/microsoft/hcsshim/pull/933
klog.V(4).InfoS("Container is not in a state that stats can be accessed. This occurs when the container is created but not started.", "containerID", containerID, "err", err)
klog.V(4).InfoS("Container is not in a state that stats can be accessed. This occurs when the container is created but not started.", "containerID", c.Id, "err", err)
return nil, nil
}
return nil, err
}

containerJSON, err := ds.client.InspectContainerWithSize(containerID)
if err != nil {
return nil, err
}

statusResp, err := ds.ContainerStatus(context.Background(), &runtimeapi.ContainerStatusRequest{ContainerId: containerID})
if err != nil {
return nil, err
}
status := statusResp.GetStatus()

timestamp := time.Now().UnixNano()
containerStats := &runtimeapi.ContainerStats{
Attributes: &runtimeapi.ContainerAttributes{
Id: containerID,
Metadata: status.Metadata,
Labels: status.Labels,
Annotations: status.Annotations,
Id: c.Id,
Metadata: c.Metadata,
Labels: c.Labels,
Annotations: c.Annotations,
},
Cpu: &runtimeapi.CpuUsage{
Timestamp: timestamp,
Expand All @@ -97,8 +80,11 @@ func (ds *dockerService) getContainerStats(containerID string) (*runtimeapi.Cont
},
WritableLayer: &runtimeapi.FilesystemUsage{
Timestamp: timestamp,
FsId: &runtimeapi.FilesystemIdentifier{Mountpoint: info.DockerRootDir},
UsedBytes: &runtimeapi.UInt64Value{Value: uint64(*containerJSON.SizeRw)},
FsId: &runtimeapi.FilesystemIdentifier{Mountpoint: ds.dockerRootDir},
// used bytes from image are not implemented on Windows
// don't query for it since it is expensive to call docker over named pipe
// https://github.com/moby/moby/blob/1ba54a5fd0ba293db3bea46cd67604b593f2048b/daemon/images/image_windows.go#L11-L14
UsedBytes: &runtimeapi.UInt64Value{Value: 0},
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you have a sense of how much of the perf improvements you see on Windows are due to info from the docker api vs how much is due to not querying for this data on Windows?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not this specifically but I added a comment here with some details #104287 (comment)

},
}
return containerStats, nil
Expand Down
4 changes: 3 additions & 1 deletion pkg/kubelet/dockershim/libdocker/fake_client.go
Expand Up @@ -35,7 +35,7 @@ import (
dockercontainer "github.com/docker/docker/api/types/container"
dockerimagetypes "github.com/docker/docker/api/types/image"

"k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
"k8s.io/utils/clock"
)

Expand Down Expand Up @@ -226,6 +226,7 @@ func convertFakeContainer(f *FakeContainer) *dockertypes.ContainerJSON {
if f.HostConfig == nil {
f.HostConfig = &dockercontainer.HostConfig{}
}
fakeRWSize := int64(40)
return &dockertypes.ContainerJSON{
ContainerJSONBase: &dockertypes.ContainerJSONBase{
ID: f.ID,
Expand All @@ -240,6 +241,7 @@ func convertFakeContainer(f *FakeContainer) *dockertypes.ContainerJSON {
},
Created: dockerTimestampToString(f.CreatedAt),
HostConfig: f.HostConfig,
SizeRw: &fakeRWSize,
},
Config: f.Config,
NetworkSettings: &dockertypes.NetworkSettings{},
Expand Down