Skip to content

Commit

Permalink
Merge pull request #114504 from vrutkovs/tracing-kubelet-toplevel
Browse files Browse the repository at this point in the history
kubelet: create top-level traces for pod sync and GC
  • Loading branch information
k8s-ci-robot committed Mar 14, 2023
2 parents ee18f60 + 556d774 commit c8f001d
Show file tree
Hide file tree
Showing 9 changed files with 73 additions and 15 deletions.
1 change: 1 addition & 0 deletions hack/verify-flags/excluded-flags.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,4 @@ file_content_in_loop
break_on_expected_content
Premium_LRS
VCP_STRESS_ITERATIONS
update_type
13 changes: 12 additions & 1 deletion pkg/kubelet/images/image_gc_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"sync"
"time"

"go.opentelemetry.io/otel/trace"
v1 "k8s.io/api/core/v1"
"k8s.io/klog/v2"

Expand All @@ -38,6 +39,9 @@ import (
"k8s.io/kubernetes/pkg/kubelet/util/sliceutils"
)

// instrumentationScope is OpenTelemetry instrumentation scope name
const instrumentationScope = "k8s.io/kubernetes/pkg/kubelet/images"

// StatsProvider is an interface for fetching stats used during image garbage
// collection.
type StatsProvider interface {
Expand Down Expand Up @@ -104,6 +108,9 @@ type realImageGCManager struct {

// sandbox image exempted from GC
sandboxImage string

// tracer for recording spans
tracer trace.Tracer
}

// imageCache caches latest result of ListImages.
Expand Down Expand Up @@ -153,7 +160,7 @@ type imageRecord struct {
}

// NewImageGCManager instantiates a new ImageGCManager object.
func NewImageGCManager(runtime container.Runtime, statsProvider StatsProvider, recorder record.EventRecorder, nodeRef *v1.ObjectReference, policy ImageGCPolicy, sandboxImage string) (ImageGCManager, error) {
func NewImageGCManager(runtime container.Runtime, statsProvider StatsProvider, recorder record.EventRecorder, nodeRef *v1.ObjectReference, policy ImageGCPolicy, sandboxImage string, tracerProvider trace.TracerProvider) (ImageGCManager, error) {
// Validate policy.
if policy.HighThresholdPercent < 0 || policy.HighThresholdPercent > 100 {
return nil, fmt.Errorf("invalid HighThresholdPercent %d, must be in range [0-100]", policy.HighThresholdPercent)
Expand All @@ -164,6 +171,7 @@ func NewImageGCManager(runtime container.Runtime, statsProvider StatsProvider, r
if policy.LowThresholdPercent > policy.HighThresholdPercent {
return nil, fmt.Errorf("LowThresholdPercent %d can not be higher than HighThresholdPercent %d", policy.LowThresholdPercent, policy.HighThresholdPercent)
}
tracer := tracerProvider.Tracer(instrumentationScope)
im := &realImageGCManager{
runtime: runtime,
policy: policy,
Expand All @@ -173,6 +181,7 @@ func NewImageGCManager(runtime container.Runtime, statsProvider StatsProvider, r
nodeRef: nodeRef,
initialized: false,
sandboxImage: sandboxImage,
tracer: tracer,
}

return im, nil
Expand Down Expand Up @@ -279,6 +288,8 @@ func (im *realImageGCManager) detectImages(ctx context.Context, detectTime time.
}

func (im *realImageGCManager) GarbageCollect(ctx context.Context) error {
ctx, otelSpan := im.tracer.Start(ctx, "Images/GarbageCollect")
defer otelSpan.End()
// Get disk usage on disk holding images.
fsStats, err := im.statsProvider.ImageFsStats(ctx)
if err != nil {
Expand Down
12 changes: 7 additions & 5 deletions pkg/kubelet/images/image_gc_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

oteltrace "go.opentelemetry.io/otel/trace"
"k8s.io/client-go/tools/record"
statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1"
"k8s.io/kubernetes/pkg/kubelet/container"
Expand All @@ -48,6 +49,7 @@ func newRealImageGCManager(policy ImageGCPolicy, mockStatsProvider stats.Provide
statsProvider: mockStatsProvider,
recorder: &record.FakeRecorder{},
sandboxImage: sandboxImage,
tracer: oteltrace.NewNoopTracerProvider().Tracer(""),
}, fakeRuntime
}

Expand Down Expand Up @@ -543,7 +545,7 @@ func TestGarbageCollectBelowLowThreshold(t *testing.T) {
manager, _ := newRealImageGCManager(policy, mockStatsProvider)

// Expect 40% usage.
mockStatsProvider.EXPECT().ImageFsStats(ctx).Return(&statsapi.FsStats{
mockStatsProvider.EXPECT().ImageFsStats(gomock.Any()).Return(&statsapi.FsStats{
AvailableBytes: uint64Ptr(600),
CapacityBytes: uint64Ptr(1000),
}, nil)
Expand All @@ -562,7 +564,7 @@ func TestGarbageCollectCadvisorFailure(t *testing.T) {
mockStatsProvider := statstest.NewMockProvider(mockCtrl)
manager, _ := newRealImageGCManager(policy, mockStatsProvider)

mockStatsProvider.EXPECT().ImageFsStats(ctx).Return(&statsapi.FsStats{}, fmt.Errorf("error"))
mockStatsProvider.EXPECT().ImageFsStats(gomock.Any()).Return(&statsapi.FsStats{}, fmt.Errorf("error"))
assert.NotNil(t, manager.GarbageCollect(ctx))
}

Expand All @@ -579,7 +581,7 @@ func TestGarbageCollectBelowSuccess(t *testing.T) {
manager, fakeRuntime := newRealImageGCManager(policy, mockStatsProvider)

// Expect 95% usage and most of it gets freed.
mockStatsProvider.EXPECT().ImageFsStats(ctx).Return(&statsapi.FsStats{
mockStatsProvider.EXPECT().ImageFsStats(gomock.Any()).Return(&statsapi.FsStats{
AvailableBytes: uint64Ptr(50),
CapacityBytes: uint64Ptr(1000),
}, nil)
Expand All @@ -602,7 +604,7 @@ func TestGarbageCollectNotEnoughFreed(t *testing.T) {
manager, fakeRuntime := newRealImageGCManager(policy, mockStatsProvider)

// Expect 95% usage and little of it gets freed.
mockStatsProvider.EXPECT().ImageFsStats(ctx).Return(&statsapi.FsStats{
mockStatsProvider.EXPECT().ImageFsStats(gomock.Any()).Return(&statsapi.FsStats{
AvailableBytes: uint64Ptr(50),
CapacityBytes: uint64Ptr(1000),
}, nil)
Expand Down Expand Up @@ -717,7 +719,7 @@ func TestValidateImageGCPolicy(t *testing.T) {
}

for _, tc := range testCases {
if _, err := NewImageGCManager(nil, nil, nil, nil, tc.imageGCPolicy, ""); err != nil {
if _, err := NewImageGCManager(nil, nil, nil, nil, tc.imageGCPolicy, "", oteltrace.NewNoopTracerProvider()); err != nil {
if err.Error() != tc.expectErr {
t.Errorf("[%s:]Expected err:%v, but got:%v", tc.name, tc.expectErr, err.Error())
}
Expand Down
37 changes: 34 additions & 3 deletions pkg/kubelet/kubelet.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ import (

cadvisorapi "github.com/google/cadvisor/info/v1"
libcontaineruserns "github.com/opencontainers/runc/libcontainer/userns"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/trace"

"k8s.io/mount-utils"
Expand Down Expand Up @@ -198,6 +199,9 @@ const (

// nodeLeaseRenewIntervalFraction is the fraction of lease duration to renew the lease
nodeLeaseRenewIntervalFraction = 0.25

// instrumentationScope is the name of OpenTelemetry instrumentation scope
instrumentationScope = "k8s.io/kubernetes/pkg/kubelet"
)

var (
Expand Down Expand Up @@ -511,6 +515,8 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
insecureContainerLifecycleHTTPClient.CheckRedirect = httpprobe.RedirectChecker(false)
}

tracer := kubeDeps.TracerProvider.Tracer(instrumentationScope)

klet := &Kubelet{
hostname: hostname,
hostnameOverridden: hostnameOverridden,
Expand Down Expand Up @@ -561,6 +567,7 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
experimentalHostUserNamespaceDefaulting: utilfeature.DefaultFeatureGate.Enabled(features.ExperimentalHostUserNamespaceDefaultingGate),
keepTerminatedPodVolumes: keepTerminatedPodVolumes,
nodeStatusMaxImages: nodeStatusMaxImages,
tracer: tracer,
}

if klet.cloud != nil {
Expand Down Expand Up @@ -677,6 +684,7 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
kubeDeps.ContainerManager.GetNodeAllocatableAbsolute,
*kubeCfg.MemoryThrottlingFactor,
kubeDeps.PodStartupLatencyTracker,
kubeDeps.TracerProvider,
)
if err != nil {
return nil, err
Expand Down Expand Up @@ -760,7 +768,7 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
klet.containerDeletor = newPodContainerDeletor(klet.containerRuntime, integer.IntMax(containerGCPolicy.MaxPerPodContainer, minDeadContainerInPod))

// setup imageManager
imageManager, err := images.NewImageGCManager(klet.containerRuntime, klet.StatsProvider, kubeDeps.Recorder, nodeRef, imageGCPolicy, crOptions.PodSandboxImage)
imageManager, err := images.NewImageGCManager(klet.containerRuntime, klet.StatsProvider, kubeDeps.Recorder, nodeRef, imageGCPolicy, crOptions.PodSandboxImage, kubeDeps.TracerProvider)
if err != nil {
return nil, fmt.Errorf("failed to initialize image manager: %v", err)
}
Expand Down Expand Up @@ -1252,6 +1260,9 @@ type Kubelet struct {

// Mutex to serialize new pod admission and existing pod resizing
podResizeMutex sync.Mutex

// OpenTelemetry Tracer
tracer trace.Tracer
}

// ListPodStats is delegated to StatsProvider, which implements stats.Provider interface
Expand Down Expand Up @@ -1612,10 +1623,17 @@ func (kl *Kubelet) Run(updates <-chan kubetypes.PodUpdate) {
func (kl *Kubelet) SyncPod(_ context.Context, updateType kubetypes.SyncPodType, pod, mirrorPod *v1.Pod, podStatus *kubecontainer.PodStatus) (isTerminal bool, err error) {
// TODO(#113606): connect this with the incoming context parameter, which comes from the pod worker.
// Currently, using that context causes test failures.
ctx := context.TODO()
ctx, otelSpan := kl.tracer.Start(context.TODO(), "syncPod", trace.WithAttributes(
attribute.String("k8s.pod.uid", string(pod.UID)),
attribute.String("k8s.pod", klog.KObj(pod).String()),
attribute.String("k8s.pod.name", pod.Name),
attribute.String("k8s.pod.update_type", updateType.String()),
attribute.String("k8s.namespace.name", pod.Namespace),
))
klog.V(4).InfoS("SyncPod enter", "pod", klog.KObj(pod), "podUID", pod.UID)
defer func() {
klog.V(4).InfoS("SyncPod exit", "pod", klog.KObj(pod), "podUID", pod.UID, "isTerminal", isTerminal)
otelSpan.End()
}()

// Latency measurements for the main workflow are relative to the
Expand Down Expand Up @@ -1882,7 +1900,13 @@ func (kl *Kubelet) SyncPod(_ context.Context, updateType kubetypes.SyncPodType,
func (kl *Kubelet) SyncTerminatingPod(_ context.Context, pod *v1.Pod, podStatus *kubecontainer.PodStatus, gracePeriod *int64, podStatusFn func(*v1.PodStatus)) error {
// TODO(#113606): connect this with the incoming context parameter, which comes from the pod worker.
// Currently, using that context causes test failures.
ctx := context.Background()
ctx, otelSpan := kl.tracer.Start(context.Background(), "syncTerminatingPod", trace.WithAttributes(
attribute.String("k8s.pod.uid", string(pod.UID)),
attribute.String("k8s.pod", klog.KObj(pod).String()),
attribute.String("k8s.pod.name", pod.Name),
attribute.String("k8s.namespace.name", pod.Namespace),
))
defer otelSpan.End()
klog.V(4).InfoS("SyncTerminatingPod enter", "pod", klog.KObj(pod), "podUID", pod.UID)
defer klog.V(4).InfoS("SyncTerminatingPod exit", "pod", klog.KObj(pod), "podUID", pod.UID)

Expand Down Expand Up @@ -2004,6 +2028,13 @@ func (kl *Kubelet) SyncTerminatingRuntimePod(_ context.Context, runningPod *kube
// This typically occurs when a pod is force deleted from configuration (local disk or API) and the
// kubelet restarts in the middle of the action.
func (kl *Kubelet) SyncTerminatedPod(ctx context.Context, pod *v1.Pod, podStatus *kubecontainer.PodStatus) error {
_, otelSpan := kl.tracer.Start(context.Background(), "syncTerminatedPod", trace.WithAttributes(
attribute.String("k8s.pod.uid", string(pod.UID)),
attribute.String("k8s.pod", klog.KObj(pod).String()),
attribute.String("k8s.pod.name", pod.Name),
attribute.String("k8s.namespace.name", pod.Namespace),
))
defer otelSpan.End()
klog.V(4).InfoS("SyncTerminatedPod enter", "pod", klog.KObj(pod), "podUID", pod.UID)
defer klog.V(4).InfoS("SyncTerminatedPod exit", "pod", klog.KObj(pod), "podUID", pod.UID)

Expand Down
3 changes: 2 additions & 1 deletion pkg/kubelet/kubelet_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,7 @@ func newTestKubeletWithImageList(
kubelet.cadvisor = &cadvisortest.Fake{}
machineInfo, _ := kubelet.cadvisor.MachineInfo()
kubelet.setCachedMachineInfo(machineInfo)
kubelet.tracer = oteltrace.NewNoopTracerProvider().Tracer("")

fakeMirrorClient := podtest.NewFakeMirrorClient()
secretManager := secret.NewSimpleSecretManager(kubelet.kubeClient)
Expand Down Expand Up @@ -304,7 +305,7 @@ func newTestKubeletWithImageList(
HighThresholdPercent: 90,
LowThresholdPercent: 80,
}
imageGCManager, err := images.NewImageGCManager(fakeRuntime, kubelet.StatsProvider, fakeRecorder, fakeNodeRef, fakeImageGCPolicy, "")
imageGCManager, err := images.NewImageGCManager(fakeRuntime, kubelet.StatsProvider, fakeRecorder, fakeNodeRef, fakeImageGCPolicy, "", oteltrace.NewNoopTracerProvider())
assert.NoError(t, err)
kubelet.imageManager = &fakeImageGCManager{
fakeImageService: fakeRuntime,
Expand Down
5 changes: 3 additions & 2 deletions pkg/kubelet/kuberuntime/fake_kuberuntime_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"time"

cadvisorapi "github.com/google/cadvisor/info/v1"
"go.opentelemetry.io/otel/trace"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -90,7 +91,7 @@ func (f *fakePodPullingTimeRecorder) RecordImageStartedPulling(podUID types.UID)

func (f *fakePodPullingTimeRecorder) RecordImageFinishedPulling(podUID types.UID) {}

func newFakeKubeRuntimeManager(runtimeService internalapi.RuntimeService, imageService internalapi.ImageManagerService, machineInfo *cadvisorapi.MachineInfo, osInterface kubecontainer.OSInterface, runtimeHelper kubecontainer.RuntimeHelper, keyring credentialprovider.DockerKeyring) (*kubeGenericRuntimeManager, error) {
func newFakeKubeRuntimeManager(runtimeService internalapi.RuntimeService, imageService internalapi.ImageManagerService, machineInfo *cadvisorapi.MachineInfo, osInterface kubecontainer.OSInterface, runtimeHelper kubecontainer.RuntimeHelper, keyring credentialprovider.DockerKeyring, tracer trace.Tracer) (*kubeGenericRuntimeManager, error) {
ctx := context.Background()
recorder := &record.FakeRecorder{}
logManager, err := logs.NewContainerLogManager(runtimeService, osInterface, "1", 2)
Expand Down Expand Up @@ -122,7 +123,7 @@ func newFakeKubeRuntimeManager(runtimeService internalapi.RuntimeService, imageS
}

podStateProvider := newFakePodStateProvider()
kubeRuntimeManager.containerGC = newContainerGC(runtimeService, podStateProvider, kubeRuntimeManager)
kubeRuntimeManager.containerGC = newContainerGC(runtimeService, podStateProvider, kubeRuntimeManager, tracer)
kubeRuntimeManager.podStateProvider = podStateProvider
kubeRuntimeManager.runtimeName = typedVersion.RuntimeName
kubeRuntimeManager.imagePuller = images.NewImageManager(
Expand Down
7 changes: 6 additions & 1 deletion pkg/kubelet/kuberuntime/kuberuntime_gc.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"sort"
"time"

"go.opentelemetry.io/otel/trace"
"k8s.io/apimachinery/pkg/types"
utilerrors "k8s.io/apimachinery/pkg/util/errors"
"k8s.io/apimachinery/pkg/util/sets"
Expand All @@ -38,14 +39,16 @@ type containerGC struct {
client internalapi.RuntimeService
manager *kubeGenericRuntimeManager
podStateProvider podStateProvider
tracer trace.Tracer
}

// NewContainerGC creates a new containerGC.
func newContainerGC(client internalapi.RuntimeService, podStateProvider podStateProvider, manager *kubeGenericRuntimeManager) *containerGC {
func newContainerGC(client internalapi.RuntimeService, podStateProvider podStateProvider, manager *kubeGenericRuntimeManager, tracer trace.Tracer) *containerGC {
return &containerGC{
client: client,
manager: manager,
podStateProvider: podStateProvider,
tracer: tracer,
}
}

Expand Down Expand Up @@ -407,6 +410,8 @@ func (cgc *containerGC) evictPodLogsDirectories(ctx context.Context, allSourcesR
// * gets evictable sandboxes which are not ready and contains no containers.
// * removes evictable sandboxes.
func (cgc *containerGC) GarbageCollect(ctx context.Context, gcPolicy kubecontainer.GCPolicy, allSourcesReady bool, evictNonDeletedPods bool) error {
ctx, otelSpan := cgc.tracer.Start(ctx, "Containers/GarbageCollect")
defer otelSpan.End()
errors := []error{}
// Remove evictable containers
if err := cgc.evictContainers(ctx, gcPolicy, allSourcesReady, evictNonDeletedPods); err != nil {
Expand Down
7 changes: 6 additions & 1 deletion pkg/kubelet/kuberuntime/kuberuntime_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"time"

cadvisorapi "github.com/google/cadvisor/info/v1"
"go.opentelemetry.io/otel/trace"
crierror "k8s.io/cri-api/pkg/errors"
"k8s.io/klog/v2"

Expand Down Expand Up @@ -77,6 +78,8 @@ const (
versionCacheTTL = 60 * time.Second
// How frequently to report identical errors
identicalErrorDelay = 1 * time.Minute
// OpenTelemetry instrumentation scope name
instrumentationScope = "k8s.io/kubernetes/pkg/kubelet/kuberuntime"
)

var (
Expand Down Expand Up @@ -205,10 +208,12 @@ func NewKubeGenericRuntimeManager(
getNodeAllocatable func() v1.ResourceList,
memoryThrottlingFactor float64,
podPullingTimeRecorder images.ImagePodPullingTimeRecorder,
tracerProvider trace.TracerProvider,
) (KubeGenericRuntime, error) {
ctx := context.Background()
runtimeService = newInstrumentedRuntimeService(runtimeService)
imageService = newInstrumentedImageManagerService(imageService)
tracer := tracerProvider.Tracer(instrumentationScope)
kubeRuntimeManager := &kubeGenericRuntimeManager{
recorder: recorder,
cpuCFSQuota: cpuCFSQuota,
Expand Down Expand Up @@ -281,7 +286,7 @@ func NewKubeGenericRuntimeManager(
imagePullBurst,
podPullingTimeRecorder)
kubeRuntimeManager.runner = lifecycle.NewHandlerRunner(insecureContainerLifecycleHTTPClient, kubeRuntimeManager, kubeRuntimeManager, recorder)
kubeRuntimeManager.containerGC = newContainerGC(runtimeService, podStateProvider, kubeRuntimeManager)
kubeRuntimeManager.containerGC = newContainerGC(runtimeService, podStateProvider, kubeRuntimeManager, tracer)
kubeRuntimeManager.podStateProvider = podStateProvider

kubeRuntimeManager.versionCache = cache.NewObjectCache(
Expand Down
3 changes: 2 additions & 1 deletion pkg/kubelet/kuberuntime/kuberuntime_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (
cadvisorapi "github.com/google/cadvisor/info/v1"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
oteltrace "go.opentelemetry.io/otel/trace"

v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
Expand Down Expand Up @@ -69,7 +70,7 @@ func customTestRuntimeManager(keyring *credentialprovider.BasicDockerKeyring) (*
MemoryCapacity: uint64(memoryCapacityQuantity.Value()),
}
osInterface := &containertest.FakeOS{}
manager, err := newFakeKubeRuntimeManager(fakeRuntimeService, fakeImageService, machineInfo, osInterface, &containertest.FakeRuntimeHelper{}, keyring)
manager, err := newFakeKubeRuntimeManager(fakeRuntimeService, fakeImageService, machineInfo, osInterface, &containertest.FakeRuntimeHelper{}, keyring, oteltrace.NewNoopTracerProvider().Tracer(""))
return fakeRuntimeService, fakeImageService, manager, err
}

Expand Down

0 comments on commit c8f001d

Please sign in to comment.