Skip to content

Commit

Permalink
Merge pull request kubernetes#116702 from vinaykul/restart-free-pod-v…
Browse files Browse the repository at this point in the history
…ertical-scaling-podmutation-fix

Fix pod object update that may cause data race
  • Loading branch information
k8s-ci-robot committed Mar 22, 2023
2 parents 9c6414c + f41702b commit c7cc788
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 22 deletions.
44 changes: 24 additions & 20 deletions pkg/kubelet/kubelet.go
Original file line number Diff line number Diff line change
Expand Up @@ -1889,7 +1889,7 @@ func (kl *Kubelet) SyncPod(_ context.Context, updateType kubetypes.SyncPodType,
// TODO(vinaykul,InPlacePodVerticalScaling): Investigate doing this in HandlePodUpdates + periodic SyncLoop scan
// See: https://github.com/kubernetes/kubernetes/pull/102884#discussion_r663160060
if kl.podWorkers.CouldHaveRunningContainers(pod.UID) && !kubetypes.IsStaticPod(pod) {
kl.handlePodResourcesResize(pod)
pod = kl.handlePodResourcesResize(pod)
}
}

Expand Down Expand Up @@ -2631,13 +2631,14 @@ func (kl *Kubelet) canResizePod(pod *v1.Pod) (bool, *v1.Pod, v1.PodResizeStatus)
klog.ErrorS(err, "getNodeAnyway function failed")
return false, nil, ""
}
podCopy := pod.DeepCopy()
cpuAvailable := node.Status.Allocatable.Cpu().MilliValue()
memAvailable := node.Status.Allocatable.Memory().Value()
cpuRequests := resource.GetResourceRequest(pod, v1.ResourceCPU)
memRequests := resource.GetResourceRequest(pod, v1.ResourceMemory)
cpuRequests := resource.GetResourceRequest(podCopy, v1.ResourceCPU)
memRequests := resource.GetResourceRequest(podCopy, v1.ResourceMemory)
if cpuRequests > cpuAvailable || memRequests > memAvailable {
klog.V(3).InfoS("Resize is not feasible as request exceeds allocatable node resources", "pod", pod.Name)
return false, nil, v1.PodResizeStatusInfeasible
klog.V(3).InfoS("Resize is not feasible as request exceeds allocatable node resources", "pod", podCopy.Name)
return false, podCopy, v1.PodResizeStatusInfeasible
}

// Treat the existing pod needing resize as a new pod with desired resources seeking admit.
Expand All @@ -2649,13 +2650,12 @@ func (kl *Kubelet) canResizePod(pod *v1.Pod) (bool, *v1.Pod, v1.PodResizeStatus)
}
}

if ok, failReason, failMessage := kl.canAdmitPod(otherActivePods, pod); !ok {
if ok, failReason, failMessage := kl.canAdmitPod(otherActivePods, podCopy); !ok {
// Log reason and return. Let the next sync iteration retry the resize
klog.V(3).InfoS("Resize cannot be accommodated", "pod", pod.Name, "reason", failReason, "message", failMessage)
return false, nil, v1.PodResizeStatusDeferred
klog.V(3).InfoS("Resize cannot be accommodated", "pod", podCopy.Name, "reason", failReason, "message", failMessage)
return false, podCopy, v1.PodResizeStatusDeferred
}

podCopy := pod.DeepCopy()
for _, container := range podCopy.Spec.Containers {
idx, found := podutil.GetIndexOfContainerStatus(podCopy.Status.ContainerStatuses, container.Name)
if found {
Expand All @@ -2667,9 +2667,9 @@ func (kl *Kubelet) canResizePod(pod *v1.Pod) (bool, *v1.Pod, v1.PodResizeStatus)
return true, podCopy, v1.PodResizeStatusInProgress
}

func (kl *Kubelet) handlePodResourcesResize(pod *v1.Pod) {
func (kl *Kubelet) handlePodResourcesResize(pod *v1.Pod) *v1.Pod {
if pod.Status.Phase != v1.PodRunning {
return
return pod
}
podResized := false
for _, container := range pod.Spec.Containers {
Expand All @@ -2691,31 +2691,35 @@ func (kl *Kubelet) handlePodResourcesResize(pod *v1.Pod) {
}
}
if !podResized {
return
return pod
}

kl.podResizeMutex.Lock()
defer kl.podResizeMutex.Unlock()
fit, updatedPod, resizeStatus := kl.canResizePod(pod)
if updatedPod == nil {
return pod
}
if fit {
// Update pod resource allocation checkpoint
if err := kl.statusManager.SetPodAllocation(updatedPod); err != nil {
//TODO(vinaykul,InPlacePodVerticalScaling): Can we recover from this in some way? Investigate
klog.ErrorS(err, "SetPodAllocation failed", "pod", klog.KObj(pod))
klog.ErrorS(err, "SetPodAllocation failed", "pod", klog.KObj(updatedPod))
return pod
}
*pod = *updatedPod
}
if resizeStatus != "" {
// Save resize decision to checkpoint
if err := kl.statusManager.SetPodResizeStatus(pod.UID, resizeStatus); err != nil {
if err := kl.statusManager.SetPodResizeStatus(updatedPod.UID, resizeStatus); err != nil {
//TODO(vinaykul,InPlacePodVerticalScaling): Can we recover from this in some way? Investigate
klog.ErrorS(err, "SetPodResizeStatus failed", "pod", klog.KObj(pod))
klog.ErrorS(err, "SetPodResizeStatus failed", "pod", klog.KObj(updatedPod))
return pod
}
pod.Status.Resize = resizeStatus
updatedPod.Status.Resize = resizeStatus
}
kl.podManager.UpdatePod(pod)
kl.statusManager.SetPodStatus(pod, pod.Status)
return
kl.podManager.UpdatePod(updatedPod)
kl.statusManager.SetPodStatus(updatedPod, updatedPod.Status)
return updatedPod
}

// LatestLoopEntryTime returns the last time in the sync loop monitor.
Expand Down
6 changes: 4 additions & 2 deletions pkg/kubelet/kubelet_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2585,8 +2585,10 @@ func TestHandlePodResourcesResize(t *testing.T) {
tt.pod.Spec.Containers[0].Resources.Requests = tt.newRequests
tt.pod.Status.ContainerStatuses[0].AllocatedResources = v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M}
kubelet.handlePodResourcesResize(tt.pod)
assert.Equal(t, tt.expectedAllocations, tt.pod.Status.ContainerStatuses[0].AllocatedResources, tt.name)
assert.Equal(t, tt.expectedResize, tt.pod.Status.Resize, tt.name)
updatedPod, found := kubelet.podManager.GetPodByName(tt.pod.Namespace, tt.pod.Name)
assert.True(t, found, "expected to find pod %s", tt.pod.Name)
assert.Equal(t, tt.expectedAllocations, updatedPod.Status.ContainerStatuses[0].AllocatedResources, tt.name)
assert.Equal(t, tt.expectedResize, updatedPod.Status.Resize, tt.name)
testKubelet.fakeKubeClient.ClearActions()
}
}
Expand Down

0 comments on commit c7cc788

Please sign in to comment.