Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow KillPod to take a gracePeriodOverride #24843

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 4 additions & 1 deletion pkg/kubelet/container/runtime.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,10 @@ type Runtime interface {
SyncPod(pod *api.Pod, apiPodStatus api.PodStatus, podStatus *PodStatus, pullSecrets []api.Secret, backOff *flowcontrol.Backoff) PodSyncResult
// KillPod kills all the containers of a pod. Pod may be nil, running pod must not be.
// TODO(random-liu): Return PodSyncResult in KillPod.
KillPod(pod *api.Pod, runningPod Pod) error
// gracePeriodOverride if specified allows the caller to override the pod default grace period.
// only hard kill paths are allowed to specify a gracePeriodOverride in the kubelet in order to not corrupt user data.
// it is useful when doing SIGKILL for hard eviction scenarios, or max grace period during soft eviction scenarios.
KillPod(pod *api.Pod, runningPod Pod, gracePeriodOverride *int64) error
// GetPodStatus retrieves the status of the pod, including the
// information of all containers in the pod that are visble in Runtime.
GetPodStatus(uid types.UID, name, namespace string) (*PodStatus, error)
Expand Down
2 changes: 1 addition & 1 deletion pkg/kubelet/container/testing/fake_runtime.go
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ func (f *FakeRuntime) SyncPod(pod *api.Pod, _ api.PodStatus, _ *PodStatus, _ []a
return
}

func (f *FakeRuntime) KillPod(pod *api.Pod, runningPod Pod) error {
func (f *FakeRuntime) KillPod(pod *api.Pod, runningPod Pod, gracePeriodOverride *int64) error {
f.Lock()
defer f.Unlock()

Expand Down
4 changes: 2 additions & 2 deletions pkg/kubelet/container/testing/runtime_mock.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,8 @@ func (r *Mock) SyncPod(pod *api.Pod, apiStatus api.PodStatus, status *PodStatus,
return args.Get(0).(PodSyncResult)
}

func (r *Mock) KillPod(pod *api.Pod, runningPod Pod) error {
args := r.Called(pod, runningPod)
func (r *Mock) KillPod(pod *api.Pod, runningPod Pod, gracePeriodOverride *int64) error {
args := r.Called(pod, runningPod, gracePeriodOverride)
return args.Error(0)
}

Expand Down
45 changes: 29 additions & 16 deletions pkg/kubelet/dockertools/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -1169,14 +1169,15 @@ func (dm *DockerManager) GetContainerIP(containerID, interfaceName string) (stri
// We can only deprecate this after refactoring kubelet.
// TODO(random-liu): After using pod status for KillPod(), we can also remove the kubernetesPodLabel, because all the needed information should have
// been extract from new labels and stored in pod status.
func (dm *DockerManager) KillPod(pod *api.Pod, runningPod kubecontainer.Pod) error {
result := dm.killPodWithSyncResult(pod, runningPod)
// only hard eviction scenarios should provide a grace period override, all other code paths must pass nil.
func (dm *DockerManager) KillPod(pod *api.Pod, runningPod kubecontainer.Pod, gracePeriodOverride *int64) error {
result := dm.killPodWithSyncResult(pod, runningPod, gracePeriodOverride)
return result.Error()
}

// TODO(random-liu): This is just a temporary function, will be removed when we acturally add PodSyncResult
// NOTE(random-liu): The pod passed in could be *nil* when kubelet restarted.
func (dm *DockerManager) killPodWithSyncResult(pod *api.Pod, runningPod kubecontainer.Pod) (result kubecontainer.PodSyncResult) {
func (dm *DockerManager) killPodWithSyncResult(pod *api.Pod, runningPod kubecontainer.Pod, gracePeriodOverride *int64) (result kubecontainer.PodSyncResult) {
// Send the kills in parallel since they may take a long time.
// There may be len(runningPod.Containers) or len(runningPod.Containers)-1 of result in the channel
containerResults := make(chan *kubecontainer.SyncResult, len(runningPod.Containers))
Expand Down Expand Up @@ -1212,7 +1213,7 @@ func (dm *DockerManager) killPodWithSyncResult(pod *api.Pod, runningPod kubecont
}

killContainerResult := kubecontainer.NewSyncResult(kubecontainer.KillContainer, container.Name)
err := dm.KillContainerInPod(container.ID, containerSpec, pod, "Need to kill pod.")
err := dm.KillContainerInPod(container.ID, containerSpec, pod, "Need to kill pod.", gracePeriodOverride)
if err != nil {
killContainerResult.Fail(kubecontainer.ErrKillContainer, err.Error())
glog.Errorf("Failed to delete container: %v; Skipping pod %q", err, runningPod.ID)
Expand Down Expand Up @@ -1245,7 +1246,7 @@ func (dm *DockerManager) killPodWithSyncResult(pod *api.Pod, runningPod kubecont
}
killContainerResult := kubecontainer.NewSyncResult(kubecontainer.KillContainer, networkContainer.Name)
result.AddSyncResult(killContainerResult)
if err := dm.KillContainerInPod(networkContainer.ID, networkSpec, pod, "Need to kill pod."); err != nil {
if err := dm.KillContainerInPod(networkContainer.ID, networkSpec, pod, "Need to kill pod.", gracePeriodOverride); err != nil {
killContainerResult.Fail(kubecontainer.ErrKillContainer, err.Error())
glog.Errorf("Failed to delete container: %v; Skipping pod %q", err, runningPod.ID)
}
Expand All @@ -1255,7 +1256,7 @@ func (dm *DockerManager) killPodWithSyncResult(pod *api.Pod, runningPod kubecont

// KillContainerInPod kills a container in the pod. It must be passed either a container ID or a container and pod,
// and will attempt to lookup the other information if missing.
func (dm *DockerManager) KillContainerInPod(containerID kubecontainer.ContainerID, container *api.Container, pod *api.Pod, message string) error {
func (dm *DockerManager) KillContainerInPod(containerID kubecontainer.ContainerID, container *api.Container, pod *api.Pod, message string, gracePeriodOverride *int64) error {
switch {
case containerID.IsEmpty():
// Locate the container.
Expand Down Expand Up @@ -1287,12 +1288,14 @@ func (dm *DockerManager) KillContainerInPod(containerID kubecontainer.ContainerI
pod = storedPod
}
}
return dm.killContainer(containerID, container, pod, message)
return dm.killContainer(containerID, container, pod, message, gracePeriodOverride)
}

// killContainer accepts a containerID and an optional container or pod containing shutdown policies. Invoke
// KillContainerInPod if information must be retrieved first.
func (dm *DockerManager) killContainer(containerID kubecontainer.ContainerID, container *api.Container, pod *api.Pod, reason string) error {
// KillContainerInPod if information must be retrieved first. It is only valid to provide a grace period override
// during hard eviction scenarios. All other code paths in kubelet must never provide a grace period override otherwise
// data corruption could occur in the end-user application.
func (dm *DockerManager) killContainer(containerID kubecontainer.ContainerID, container *api.Container, pod *api.Pod, reason string, gracePeriodOverride *int64) error {
ID := containerID.ID
name := ID
if container != nil {
Expand Down Expand Up @@ -1333,10 +1336,20 @@ func (dm *DockerManager) killContainer(containerID kubecontainer.ContainerID, co
gracePeriod -= int64(unversioned.Now().Sub(start.Time).Seconds())
}

// always give containers a minimal shutdown window to avoid unnecessary SIGKILLs
if gracePeriod < minimumGracePeriodInSeconds {
gracePeriod = minimumGracePeriodInSeconds
// if the caller did not specify a grace period override, we ensure that the grace period
// is not less than the minimal shutdown window to avoid unnecessary SIGKILLs. if a caller
// did provide an override, we always set the gracePeriod to that value. the only valid
// time to send an override is during eviction scenarios where we want to do a hard kill of
// a container because of resource exhaustion for incompressible resources (i.e. disk, memory).
if gracePeriodOverride == nil {
if gracePeriod < minimumGracePeriodInSeconds {
gracePeriod = minimumGracePeriodInSeconds
}
} else {
gracePeriod = *gracePeriodOverride
glog.V(2).Infof("Killing container %q, but using %d second grace period override", name, gracePeriod)
}

err := dm.client.StopContainer(ID, int(gracePeriod))
if err == nil {
glog.V(2).Infof("Container %q exited after %s", name, unversioned.Now().Sub(start.Time))
Expand Down Expand Up @@ -1460,7 +1473,7 @@ func (dm *DockerManager) runContainerInPod(pod *api.Pod, container *api.Containe
handlerErr := dm.runner.Run(id, pod, container, container.Lifecycle.PostStart)
if handlerErr != nil {
err := fmt.Errorf("PostStart handler: %v", handlerErr)
dm.KillContainerInPod(id, container, pod, err.Error())
dm.KillContainerInPod(id, container, pod, err.Error(), nil)
return kubecontainer.ContainerID{}, err
}
}
Expand Down Expand Up @@ -1795,7 +1808,7 @@ func (dm *DockerManager) SyncPod(pod *api.Pod, _ api.PodStatus, podStatus *kubec

// Killing phase: if we want to start new infra container, or nothing is running kill everything (including infra container)
// TODO(random-liu): We'll use pod status directly in the future
killResult := dm.killPodWithSyncResult(pod, kubecontainer.ConvertPodStatusToRunningPod(podStatus))
killResult := dm.killPodWithSyncResult(pod, kubecontainer.ConvertPodStatusToRunningPod(podStatus), nil)
result.AddPodSyncResult(killResult)
if killResult.Error() != nil {
return
Expand All @@ -1819,7 +1832,7 @@ func (dm *DockerManager) SyncPod(pod *api.Pod, _ api.PodStatus, podStatus *kubec
}
killContainerResult := kubecontainer.NewSyncResult(kubecontainer.KillContainer, containerStatus.Name)
result.AddSyncResult(killContainerResult)
if err := dm.KillContainerInPod(containerStatus.ID, podContainer, pod, killMessage); err != nil {
if err := dm.KillContainerInPod(containerStatus.ID, podContainer, pod, killMessage, nil); err != nil {
killContainerResult.Fail(kubecontainer.ErrKillContainer, err.Error())
glog.Errorf("Error killing container %q(id=%q) for pod %q: %v", containerStatus.Name, containerStatus.ID, format.Pod(pod), err)
return
Expand Down Expand Up @@ -1871,7 +1884,7 @@ func (dm *DockerManager) SyncPod(pod *api.Pod, _ api.PodStatus, podStatus *kubec
result.AddSyncResult(killContainerResult)
if delErr := dm.KillContainerInPod(kubecontainer.ContainerID{
ID: string(podInfraContainerID),
Type: "docker"}, nil, pod, message); delErr != nil {
Type: "docker"}, nil, pod, message, nil); delErr != nil {
killContainerResult.Fail(kubecontainer.ErrKillContainer, delErr.Error())
glog.Warningf("Clear infra container failed for pod %q: %v", format.Pod(pod), delErr)
}
Expand Down
8 changes: 4 additions & 4 deletions pkg/kubelet/dockertools/manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,7 @@ func TestKillContainerInPod(t *testing.T) {

fakeDocker.SetFakeRunningContainers(containers)

if err := manager.KillContainerInPod(kubecontainer.ContainerID{}, &pod.Spec.Containers[0], pod, "test kill container in pod."); err != nil {
if err := manager.KillContainerInPod(kubecontainer.ContainerID{}, &pod.Spec.Containers[0], pod, "test kill container in pod.", nil); err != nil {
t.Errorf("unexpected error: %v", err)
}
// Assert the container has been stopped.
Expand Down Expand Up @@ -470,7 +470,7 @@ func TestKillContainerInPodWithPreStop(t *testing.T) {
containerToKill := containers[0]
fakeDocker.SetFakeRunningContainers(containers)

if err := manager.KillContainerInPod(kubecontainer.ContainerID{}, &pod.Spec.Containers[0], pod, "test kill container with preStop."); err != nil {
if err := manager.KillContainerInPod(kubecontainer.ContainerID{}, &pod.Spec.Containers[0], pod, "test kill container with preStop.", nil); err != nil {
t.Errorf("unexpected error: %v", err)
}
// Assert the container has been stopped.
Expand Down Expand Up @@ -507,7 +507,7 @@ func TestKillContainerInPodWithError(t *testing.T) {
fakeDocker.SetFakeRunningContainers(containers)
fakeDocker.InjectError("stop", fmt.Errorf("sample error"))

if err := manager.KillContainerInPod(kubecontainer.ContainerID{}, &pod.Spec.Containers[0], pod, "test kill container with error."); err == nil {
if err := manager.KillContainerInPod(kubecontainer.ContainerID{}, &pod.Spec.Containers[0], pod, "test kill container with error.", nil); err == nil {
t.Errorf("expected error, found nil")
}
}
Expand Down Expand Up @@ -1107,7 +1107,7 @@ func TestGetRestartCount(t *testing.T) {
if cs == nil {
t.Fatalf("Can't find status for container %q", containerName)
}
dm.KillContainerInPod(cs.ID, &pod.Spec.Containers[0], pod, "test container restart count.")
dm.KillContainerInPod(cs.ID, &pod.Spec.Containers[0], pod, "test container restart count.", nil)
}
// Container "bar" starts the first time.
// TODO: container lists are expected to be sorted reversely by time.
Expand Down
8 changes: 4 additions & 4 deletions pkg/kubelet/kubelet.go
Original file line number Diff line number Diff line change
Expand Up @@ -1649,14 +1649,14 @@ func parseResolvConf(reader io.Reader, dnsScrubber dnsScrubber) (nameservers []s

// One of the following aruguements must be non-nil: runningPod, status.
// TODO: Modify containerRuntime.KillPod() to accept the right arguments.
func (kl *Kubelet) killPod(pod *api.Pod, runningPod *kubecontainer.Pod, status *kubecontainer.PodStatus) error {
func (kl *Kubelet) killPod(pod *api.Pod, runningPod *kubecontainer.Pod, status *kubecontainer.PodStatus, gracePeriodOverride *int64) error {
var p kubecontainer.Pod
if runningPod != nil {
p = *runningPod
} else if status != nil {
p = kubecontainer.ConvertPodStatusToRunningPod(status)
}
return kl.containerRuntime.KillPod(pod, p)
return kl.containerRuntime.KillPod(pod, p, gracePeriodOverride)
}

// makePodDataDirs creates the dirs for the pod datas.
Expand Down Expand Up @@ -1734,7 +1734,7 @@ func (kl *Kubelet) syncPod(pod *api.Pod, mirrorPod *api.Pod, podStatus *kubecont

// Kill pod if it should not be running
if err := canRunPod(pod); err != nil || pod.DeletionTimestamp != nil || apiPodStatus.Phase == api.PodFailed {
if err := kl.killPod(pod, nil, podStatus); err != nil {
if err := kl.killPod(pod, nil, podStatus, nil); err != nil {
utilruntime.HandleError(err)
}
return err
Expand Down Expand Up @@ -2257,7 +2257,7 @@ func (kl *Kubelet) podKiller() {
ch <- runningPod.ID
}()
glog.V(2).Infof("Killing unwanted pod %q", runningPod.Name)
err := kl.killPod(apiPod, runningPod, nil)
err := kl.killPod(apiPod, runningPod, nil, nil)
if err != nil {
glog.Errorf("Failed killing the pod %q: %v", runningPod.Name, err)
}
Expand Down
7 changes: 4 additions & 3 deletions pkg/kubelet/rkt/rkt.go
Original file line number Diff line number Diff line change
Expand Up @@ -1038,7 +1038,7 @@ func (r *Runtime) RunPod(pod *api.Pod, pullSecrets []api.Secret) error {
// This is a temporary solution until we have a clean design on how
// kubelet handles events. See https://github.com/kubernetes/kubernetes/issues/23084.
if err := r.runLifecycleHooks(pod, runtimePod, lifecyclePostStartHook); err != nil {
if errKill := r.KillPod(pod, *runtimePod); errKill != nil {
if errKill := r.KillPod(pod, *runtimePod, nil); errKill != nil {
return errors.NewAggregate([]error{err, errKill})
}
return err
Expand Down Expand Up @@ -1282,7 +1282,8 @@ func (r *Runtime) waitPreStopHooks(pod *api.Pod, runningPod *kubecontainer.Pod)
}

// KillPod invokes 'systemctl kill' to kill the unit that runs the pod.
func (r *Runtime) KillPod(pod *api.Pod, runningPod kubecontainer.Pod) error {
// TODO: add support for gracePeriodOverride which is used in eviction scenarios
func (r *Runtime) KillPod(pod *api.Pod, runningPod kubecontainer.Pod, gracePeriodOverride *int64) error {
glog.V(4).Infof("Rkt is killing pod: name %q.", runningPod.Name)

if len(runningPod.Containers) == 0 {
Expand Down Expand Up @@ -1413,7 +1414,7 @@ func (r *Runtime) SyncPod(pod *api.Pod, podStatus api.PodStatus, internalPodStat
if restartPod {
// Kill the pod only if the pod is actually running.
if len(runningPod.Containers) > 0 {
if err = r.KillPod(pod, runningPod); err != nil {
if err = r.KillPod(pod, runningPod, nil); err != nil {
return
}
}
Expand Down