Skip to content

Commit

Permalink
Merge pull request #124220 from HirazawaUi/fix-pod-restarted
Browse files Browse the repository at this point in the history
[kubelet]: fixed container restart due to pod spec field changes
  • Loading branch information
k8s-ci-robot committed May 22, 2024
2 parents 5cdab88 + 3ec13c5 commit dad8fe7
Show file tree
Hide file tree
Showing 10 changed files with 50 additions and 88 deletions.
11 changes: 3 additions & 8 deletions pkg/kubelet/container/container_hash_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,7 @@ var (
}
`

sampleV115HashValue = uint64(0x311670a)
sampleV116HashValue = sampleV115HashValue
sampleV131HashValue = uint64(0x8e45cbd0)
)

func TestConsistentHashContainer(t *testing.T) {
Expand All @@ -79,11 +78,7 @@ func TestConsistentHashContainer(t *testing.T) {
}

currentHash := HashContainer(container)
if currentHash != sampleV116HashValue {
t.Errorf("mismatched hash value with v1.16")
}

if currentHash != sampleV115HashValue {
t.Errorf("mismatched hash value with v1.15")
if currentHash != sampleV131HashValue {
t.Errorf("mismatched hash value with v1.31")
}
}
45 changes: 18 additions & 27 deletions pkg/kubelet/container/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,28 +110,20 @@ func ShouldContainerBeRestarted(container *v1.Container, pod *v1.Pod, podStatus
// Note: remember to update hashValues in container_hash_test.go as well.
func HashContainer(container *v1.Container) uint64 {
hash := fnv.New32a()
// Omit nil or empty field when calculating hash value
// Please see https://github.com/kubernetes/kubernetes/issues/53644
containerJSON, _ := json.Marshal(container)
containerJSON, _ := json.Marshal(pickFieldsToHash(container))
hashutil.DeepHashObject(hash, containerJSON)
return uint64(hash.Sum32())
}

// HashContainerWithoutResources returns the hash of the container with Resources field zero'd out.
func HashContainerWithoutResources(container *v1.Container) uint64 {
// InPlacePodVerticalScaling enables mutable Resources field.
// Changes to this field may not require container restart depending on policy.
// Compute hash over fields besides the Resources field
// NOTE: This is needed during alpha and beta so that containers using Resources but
// not subject to In-place resize are not unexpectedly restarted when
// InPlacePodVerticalScaling feature-gate is toggled.
//TODO(vinaykul,InPlacePodVerticalScaling): Remove this in GA+1 and make HashContainerWithoutResources to become Hash.
hashWithoutResources := fnv.New32a()
containerCopy := container.DeepCopy()
containerCopy.Resources = v1.ResourceRequirements{}
containerJSON, _ := json.Marshal(containerCopy)
hashutil.DeepHashObject(hashWithoutResources, containerJSON)
return uint64(hashWithoutResources.Sum32())
// pickFieldsToHash pick fields that will affect the running status of the container for hash,
// currently this field range only contains `image` and `name`.
// Note: this list must be updated if ever kubelet wants to allow mutations to other fields.
func pickFieldsToHash(container *v1.Container) map[string]string {
retval := map[string]string{
"name": container.Name,
"image": container.Image,
}
return retval
}

// envVarsToMap constructs a map of environment name to value from a slice
Expand Down Expand Up @@ -269,15 +261,14 @@ func ConvertPodStatusToRunningPod(runtimeName string, podStatus *PodStatus) Pod
continue
}
container := &Container{
ID: containerStatus.ID,
Name: containerStatus.Name,
Image: containerStatus.Image,
ImageID: containerStatus.ImageID,
ImageRef: containerStatus.ImageRef,
ImageRuntimeHandler: containerStatus.ImageRuntimeHandler,
Hash: containerStatus.Hash,
HashWithoutResources: containerStatus.HashWithoutResources,
State: containerStatus.State,
ID: containerStatus.ID,
Name: containerStatus.Name,
Image: containerStatus.Image,
ImageID: containerStatus.ImageID,
ImageRef: containerStatus.ImageRef,
ImageRuntimeHandler: containerStatus.ImageRuntimeHandler,
Hash: containerStatus.Hash,
State: containerStatus.State,
}
runningPod.Containers = append(runningPod.Containers, container)
}
Expand Down
12 changes: 6 additions & 6 deletions pkg/kubelet/container/helpers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -657,7 +657,7 @@ func TestHashContainer(t *testing.T) {
"echo abc",
},
containerPort: int32(8001),
expectedHash: uint64(0x3c42280f),
expectedHash: uint64(0x8e45cbd0),
},
}

Expand Down Expand Up @@ -938,7 +938,7 @@ func TestHashContainerWithoutResources(t *testing.T) {
},
ResizePolicy: []v1.ContainerResizePolicy{cpuPolicyRestartRequired, memPolicyRestartNotRequired},
},
0x5f62cb4c,
0x11a6d6d6,
},
{
"Burstable pod with memory policy restart required",
Expand All @@ -951,7 +951,7 @@ func TestHashContainerWithoutResources(t *testing.T) {
},
ResizePolicy: []v1.ContainerResizePolicy{cpuPolicyRestartNotRequired, memPolicyRestartRequired},
},
0xcdab9e00,
0x11a6d6d6,
},
{
"Guaranteed pod with CPU policy restart required",
Expand All @@ -964,7 +964,7 @@ func TestHashContainerWithoutResources(t *testing.T) {
},
ResizePolicy: []v1.ContainerResizePolicy{cpuPolicyRestartRequired, memPolicyRestartNotRequired},
},
0x5f62cb4c,
0x11a6d6d6,
},
{
"Guaranteed pod with memory policy restart required",
Expand All @@ -977,13 +977,13 @@ func TestHashContainerWithoutResources(t *testing.T) {
},
ResizePolicy: []v1.ContainerResizePolicy{cpuPolicyRestartNotRequired, memPolicyRestartRequired},
},
0xcdab9e00,
0x11a6d6d6,
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
containerCopy := tc.container.DeepCopy()
hash := HashContainerWithoutResources(tc.container)
hash := HashContainer(tc.container)
assert.Equal(t, tc.expectedHash, hash, "[%s]", tc.name)
assert.Equal(t, containerCopy, tc.container, "[%s]", tc.name)
})
Expand Down
7 changes: 0 additions & 7 deletions pkg/kubelet/container/runtime.go
Original file line number Diff line number Diff line change
Expand Up @@ -295,11 +295,6 @@ type Container struct {
// Hash of the container, used for comparison. Optional for containers
// not managed by kubelet.
Hash uint64
// Hash of the container over fields with Resources field zero'd out.
// NOTE: This is needed during alpha and beta so that containers using Resources are
// not unexpectedly restarted when InPlacePodVerticalScaling feature-gate is toggled.
//TODO(vinaykul,InPlacePodVerticalScaling): Remove this in GA+1 and make HashWithoutResources to become Hash.
HashWithoutResources uint64
// State is the state of the container.
State State
}
Expand Down Expand Up @@ -365,8 +360,6 @@ type Status struct {
ImageRuntimeHandler string
// Hash of the container, used for comparison.
Hash uint64
// Hash of the container over fields with Resources field zero'd out.
HashWithoutResources uint64
// Number of times that the container has been restarted.
RestartCount int
// A string explains why container is in such a status.
Expand Down
17 changes: 8 additions & 9 deletions pkg/kubelet/kuberuntime/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,15 +102,14 @@ func (m *kubeGenericRuntimeManager) toKubeContainer(c *runtimeapi.Container) (*k

annotatedInfo := getContainerInfoFromAnnotations(c.Annotations)
return &kubecontainer.Container{
ID: kubecontainer.ContainerID{Type: m.runtimeName, ID: c.Id},
Name: c.GetMetadata().GetName(),
ImageID: imageID,
ImageRef: c.ImageRef,
ImageRuntimeHandler: c.Image.RuntimeHandler,
Image: c.Image.Image,
Hash: annotatedInfo.Hash,
HashWithoutResources: annotatedInfo.HashWithoutResources,
State: toKubeContainerState(c.State),
ID: kubecontainer.ContainerID{Type: m.runtimeName, ID: c.Id},
Name: c.GetMetadata().GetName(),
ImageID: imageID,
ImageRef: c.ImageRef,
ImageRuntimeHandler: c.Image.RuntimeHandler,
Image: c.Image.Image,
Hash: annotatedInfo.Hash,
State: toKubeContainerState(c.State),
}, nil
}

Expand Down
21 changes: 10 additions & 11 deletions pkg/kubelet/kuberuntime/kuberuntime_container.go
Original file line number Diff line number Diff line change
Expand Up @@ -622,17 +622,16 @@ func toKubeContainerStatus(status *runtimeapi.ContainerStatus, runtimeName strin
Type: runtimeName,
ID: status.Id,
},
Name: labeledInfo.ContainerName,
Image: status.Image.Image,
ImageID: imageID,
ImageRef: status.ImageRef,
ImageRuntimeHandler: status.Image.RuntimeHandler,
Hash: annotatedInfo.Hash,
HashWithoutResources: annotatedInfo.HashWithoutResources,
RestartCount: annotatedInfo.RestartCount,
State: toKubeContainerState(status.State),
CreatedAt: time.Unix(0, status.CreatedAt),
Resources: cStatusResources,
Name: labeledInfo.ContainerName,
Image: status.Image.Image,
ImageID: imageID,
ImageRef: status.ImageRef,
ImageRuntimeHandler: status.Image.RuntimeHandler,
Hash: annotatedInfo.Hash,
RestartCount: annotatedInfo.RestartCount,
State: toKubeContainerState(status.State),
CreatedAt: time.Unix(0, status.CreatedAt),
Resources: cStatusResources,
}

if status.State != runtimeapi.ContainerState_CONTAINER_CREATED {
Expand Down
9 changes: 5 additions & 4 deletions pkg/kubelet/kuberuntime/kuberuntime_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -518,6 +518,10 @@ func (p podActions) String() string {
p.KillPod, p.CreateSandbox, p.UpdatePodResources, p.Attempt, p.InitContainersToStart, p.ContainersToStart, p.EphemeralContainersToStart, p.ContainersToUpdate, p.ContainersToKill)
}

// containerChanged will determine whether the container has changed based on the fields that will affect the running of the container.
// Currently, there are only `image` and `name` fields.
// we don't need to consider the pod UID here, because we find the containerStatus through the pod UID.
// If the pod UID changes, we will not be able to find the containerStatus to compare against.
func containerChanged(container *v1.Container, containerStatus *kubecontainer.Status) (uint64, uint64, bool) {
expectedHash := kubecontainer.HashContainer(container)
return expectedHash, containerStatus.Hash, containerStatus.Hash != expectedHash
Expand Down Expand Up @@ -981,10 +985,7 @@ func (m *kubeGenericRuntimeManager) computePodActions(ctx context.Context, pod *
var message string
var reason containerKillReason
restart := shouldRestartOnFailure(pod)
// Do not restart if only the Resources field has changed with InPlacePodVerticalScaling enabled
if _, _, changed := containerChanged(&container, containerStatus); changed &&
(!isInPlacePodVerticalScalingAllowed(pod) ||
kubecontainer.HashContainerWithoutResources(&container) != containerStatus.HashWithoutResources) {
if _, _, changed := containerChanged(&container, containerStatus); changed {
message = fmt.Sprintf("Container %s definition changed", container.Name)
// Restart regardless of the restart policy because the container
// spec changed.
Expand Down
2 changes: 0 additions & 2 deletions pkg/kubelet/kuberuntime/kuberuntime_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2436,7 +2436,6 @@ func TestComputePodActionsForPodResize(t *testing.T) {
// compute hash
if kcs := kps.FindContainerStatusByName(pod.Spec.Containers[idx].Name); kcs != nil {
kcs.Hash = kubecontainer.HashContainer(&pod.Spec.Containers[idx])
kcs.HashWithoutResources = kubecontainer.HashContainerWithoutResources(&pod.Spec.Containers[idx])
}
}
makeAndSetFakePod(t, m, fakeRuntime, pod)
Expand All @@ -2452,7 +2451,6 @@ func TestComputePodActionsForPodResize(t *testing.T) {
for idx := range pod.Spec.Containers {
if kcs := kps.FindContainerStatusByName(pod.Spec.Containers[idx].Name); kcs != nil {
kcs.Hash = kubecontainer.HashContainer(&pod.Spec.Containers[idx])
kcs.HashWithoutResources = kubecontainer.HashContainerWithoutResources(&pod.Spec.Containers[idx])
}
}
if test.mutatePodFn != nil {
Expand Down
12 changes: 0 additions & 12 deletions pkg/kubelet/kuberuntime/labels.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,8 @@ import (

v1 "k8s.io/api/core/v1"
kubetypes "k8s.io/apimachinery/pkg/types"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/klog/v2"
"k8s.io/kubelet/pkg/types"
"k8s.io/kubernetes/pkg/features"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
)

Expand All @@ -35,7 +33,6 @@ const (
podTerminationGracePeriodLabel = "io.kubernetes.pod.terminationGracePeriod"

containerHashLabel = "io.kubernetes.container.hash"
containerHashWithoutResourcesLabel = "io.kubernetes.container.hashWithoutResources"
containerRestartCountLabel = "io.kubernetes.container.restartCount"
containerTerminationMessagePathLabel = "io.kubernetes.container.terminationMessagePath"
containerTerminationMessagePolicyLabel = "io.kubernetes.container.terminationMessagePolicy"
Expand Down Expand Up @@ -65,7 +62,6 @@ type labeledContainerInfo struct {

type annotatedContainerInfo struct {
Hash uint64
HashWithoutResources uint64
RestartCount int
PodDeletionGracePeriod *int64
PodTerminationGracePeriod *int64
Expand Down Expand Up @@ -117,9 +113,6 @@ func newContainerAnnotations(container *v1.Container, pod *v1.Pod, restartCount
}

annotations[containerHashLabel] = strconv.FormatUint(kubecontainer.HashContainer(container), 16)
if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) {
annotations[containerHashWithoutResourcesLabel] = strconv.FormatUint(kubecontainer.HashContainerWithoutResources(container), 16)
}
annotations[containerRestartCountLabel] = strconv.Itoa(restartCount)
annotations[containerTerminationMessagePathLabel] = container.TerminationMessagePath
annotations[containerTerminationMessagePolicyLabel] = string(container.TerminationMessagePolicy)
Expand Down Expand Up @@ -200,11 +193,6 @@ func getContainerInfoFromAnnotations(annotations map[string]string) *annotatedCo
if containerInfo.Hash, err = getUint64ValueFromLabel(annotations, containerHashLabel); err != nil {
klog.ErrorS(err, "Unable to get label value from annotations", "label", containerHashLabel, "annotations", annotations)
}
if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) {
if containerInfo.HashWithoutResources, err = getUint64ValueFromLabel(annotations, containerHashWithoutResourcesLabel); err != nil {
klog.ErrorS(err, "Unable to get label value from annotations", "label", containerHashWithoutResourcesLabel, "annotations", annotations)
}
}
if containerInfo.RestartCount, err = getIntValueFromLabel(annotations, containerRestartCountLabel); err != nil {
klog.ErrorS(err, "Unable to get label value from annotations", "label", containerRestartCountLabel, "annotations", annotations)
}
Expand Down
2 changes: 0 additions & 2 deletions pkg/kubelet/kuberuntime/labels_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,6 @@ func TestContainerAnnotations(t *testing.T) {
PodDeletionGracePeriod: pod.DeletionGracePeriodSeconds,
PodTerminationGracePeriod: pod.Spec.TerminationGracePeriodSeconds,
Hash: kubecontainer.HashContainer(container),
HashWithoutResources: kubecontainer.HashContainerWithoutResources(container),
RestartCount: restartCount,
TerminationMessagePath: container.TerminationMessagePath,
PreStopHandler: container.Lifecycle.PreStop,
Expand All @@ -182,7 +181,6 @@ func TestContainerAnnotations(t *testing.T) {
expected.PreStopHandler = nil
// Because container is changed, the Hash should be updated
expected.Hash = kubecontainer.HashContainer(container)
expected.HashWithoutResources = kubecontainer.HashContainerWithoutResources(container)
annotations = newContainerAnnotations(container, pod, restartCount, opts)
containerInfo = getContainerInfoFromAnnotations(annotations)
if !reflect.DeepEqual(containerInfo, expected) {
Expand Down

0 comments on commit dad8fe7

Please sign in to comment.