Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Better choices of what pods to kill #22078

Merged
merged 2 commits into from Feb 28, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
61 changes: 54 additions & 7 deletions pkg/controller/controller_utils.go
Expand Up @@ -34,6 +34,7 @@ import (
"k8s.io/kubernetes/pkg/labels"
"k8s.io/kubernetes/pkg/runtime"
"k8s.io/kubernetes/pkg/util"
"k8s.io/kubernetes/pkg/util/integer"
)

const CreatedByAnnotation = "kubernetes.io/created-by"
Expand Down Expand Up @@ -409,22 +410,68 @@ func (s ActivePods) Len() int { return len(s) }
func (s ActivePods) Swap(i, j int) { s[i], s[j] = s[j], s[i] }

func (s ActivePods) Less(i, j int) bool {
// Unassigned < assigned
if s[i].Spec.NodeName == "" && s[j].Spec.NodeName != "" {
return true
// 1. Unassigned < assigned
// If only one of the pods is unassigned, the unassigned one is smaller
if s[i].Spec.NodeName != s[j].Spec.NodeName && (len(s[i].Spec.NodeName) == 0 || len(s[j].Spec.NodeName) == 0) {
return len(s[i].Spec.NodeName) == 0
}
// PodPending < PodUnknown < PodRunning
// 2. PodPending < PodUnknown < PodRunning
m := map[api.PodPhase]int{api.PodPending: 0, api.PodUnknown: 1, api.PodRunning: 2}
if m[s[i].Status.Phase] != m[s[j].Status.Phase] {
return m[s[i].Status.Phase] < m[s[j].Status.Phase]
}
// Not ready < ready
if !api.IsPodReady(s[i]) && api.IsPodReady(s[j]) {
return true
// 3. Not ready < ready
// If only one of the pods is not ready, the not ready one is smaller
if api.IsPodReady(s[i]) != api.IsPodReady(s[j]) {
return !api.IsPodReady(s[i])
}
// TODO: take availability into account when we push minReadySeconds information from deployment into pods,
// see https://github.com/kubernetes/kubernetes/issues/22065
// 4. Been ready for empty time < less time < more time
// If both pods are ready, the latest ready one is smaller
if api.IsPodReady(s[i]) && api.IsPodReady(s[j]) && !podReadyTime(s[i]).Equal(podReadyTime(s[j])) {
return afterOrZero(podReadyTime(s[i]), podReadyTime(s[j]))
}
// 5. Pods with containers with higher restart counts < lower restart counts
if maxContainerRestarts(s[i]) != maxContainerRestarts(s[j]) {
return maxContainerRestarts(s[i]) > maxContainerRestarts(s[j])
}
// 6. Empty creation time pods < newer pods < older pods
if !s[i].CreationTimestamp.Equal(s[j].CreationTimestamp) {
return afterOrZero(s[i].CreationTimestamp, s[j].CreationTimestamp)
}
return false
}

// afterOrZero checks if time t1 is after time t2; if one of them
// is zero, the zero time is seen as after non-zero time.
func afterOrZero(t1, t2 unversioned.Time) bool {
if t1.Time.IsZero() || t2.Time.IsZero() {
return t1.Time.IsZero()
}
return t1.After(t2.Time)
}

func podReadyTime(pod *api.Pod) unversioned.Time {
if api.IsPodReady(pod) {
for _, c := range pod.Status.Conditions {
// we only care about pod ready conditions
if c.Type == api.PodReady && c.Status == api.ConditionTrue {
return c.LastTransitionTime
}
}
}
return unversioned.Time{}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is empty time 0 or infinity with respect to After/Before comparisons?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Empty time 0 is always before non-empty time (and vise versa). Should check if the time is empty in step 4.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@bgrant0607 Done in the 2nd commit. PTAL

}

func maxContainerRestarts(pod *api.Pod) int {
maxRestarts := 0
for _, c := range pod.Status.ContainerStatuses {
maxRestarts = integer.IntMax(maxRestarts, c.RestartCount)
}
return maxRestarts
}

// FilterActivePods returns pods that have not terminated.
func FilterActivePods(pods []api.Pod) []*api.Pod {
var result []*api.Pod
Expand Down
29 changes: 27 additions & 2 deletions pkg/controller/controller_utils_test.go
Expand Up @@ -245,7 +245,7 @@ func TestActivePodFiltering(t *testing.T) {
}

func TestSortingActivePods(t *testing.T) {
numPods := 5
numPods := 9
// This rc is not needed by the test, only the newPodList to give the pods labels/a namespace.
rc := newReplicationController(0)
podList := newPodList(nil, numPods, api.PodRunning, rc)
Expand All @@ -266,10 +266,35 @@ func TestSortingActivePods(t *testing.T) {
// pods[3] is running but not ready.
pods[3].Spec.NodeName = "foo"
pods[3].Status.Phase = api.PodRunning
// pods[4] is running and ready.
// pods[4] is running and ready but without LastTransitionTime.
now := unversioned.Now()
pods[4].Spec.NodeName = "foo"
pods[4].Status.Phase = api.PodRunning
pods[4].Status.Conditions = []api.PodCondition{{Type: api.PodReady, Status: api.ConditionTrue}}
pods[4].Status.ContainerStatuses = []api.ContainerStatus{{RestartCount: 3}, {RestartCount: 0}}
// pods[5] is running and ready and with LastTransitionTime.
pods[5].Spec.NodeName = "foo"
pods[5].Status.Phase = api.PodRunning
pods[5].Status.Conditions = []api.PodCondition{{Type: api.PodReady, Status: api.ConditionTrue, LastTransitionTime: now}}
pods[5].Status.ContainerStatuses = []api.ContainerStatus{{RestartCount: 3}, {RestartCount: 0}}
// pods[6] is running ready for a longer time than pods[5].
then := unversioned.Time{Time: now.AddDate(0, -1, 0)}
pods[6].Spec.NodeName = "foo"
pods[6].Status.Phase = api.PodRunning
pods[6].Status.Conditions = []api.PodCondition{{Type: api.PodReady, Status: api.ConditionTrue, LastTransitionTime: then}}
pods[6].Status.ContainerStatuses = []api.ContainerStatus{{RestartCount: 3}, {RestartCount: 0}}
// pods[7] has lower container restart count than pods[6].
pods[7].Spec.NodeName = "foo"
pods[7].Status.Phase = api.PodRunning
pods[7].Status.Conditions = []api.PodCondition{{Type: api.PodReady, Status: api.ConditionTrue, LastTransitionTime: then}}
pods[7].Status.ContainerStatuses = []api.ContainerStatus{{RestartCount: 2}, {RestartCount: 1}}
pods[7].CreationTimestamp = now
// pods[8] is older than pods[7].
pods[8].Spec.NodeName = "foo"
pods[8].Status.Phase = api.PodRunning
pods[8].Status.Conditions = []api.PodCondition{{Type: api.PodReady, Status: api.ConditionTrue, LastTransitionTime: then}}
pods[8].Status.ContainerStatuses = []api.ContainerStatus{{RestartCount: 2}, {RestartCount: 1}}
pods[8].CreationTimestamp = then

getOrder := func(pods []*api.Pod) []string {
names := make([]string, len(pods))
Expand Down