Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Moving some force deletion logic from the NC into the PodGC #35541

Merged
merged 4 commits into from Oct 28, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
11 changes: 1 addition & 10 deletions pkg/controller/node/controller_utils.go
Expand Up @@ -135,12 +135,6 @@ func (nc *NodeController) maybeDeleteTerminatingPod(obj interface{}) {
return
}

// delete terminating pods that have not yet been scheduled
if len(pod.Spec.NodeName) == 0 {
utilruntime.HandleError(nc.forcefullyDeletePod(pod))
return
}

nodeObj, found, err := nc.nodeStore.Store.GetByKey(pod.Spec.NodeName)
if err != nil {
// this can only happen if the Store.KeyFunc has a problem creating
Expand All @@ -150,11 +144,8 @@ func (nc *NodeController) maybeDeleteTerminatingPod(obj interface{}) {
return
}

// delete terminating pods that have been scheduled on
// nonexistent nodes
// if there is no such node, do nothing and let the podGC clean it up.
if !found {
glog.Warningf("Unable to find Node: %v, deleting all assigned Pods.", pod.Spec.NodeName)
utilruntime.HandleError(nc.forcefullyDeletePod(pod))
return
}

Expand Down
4 changes: 2 additions & 2 deletions pkg/controller/node/nodecontroller_test.go
Expand Up @@ -1723,14 +1723,14 @@ func TestCheckPod(t *testing.T) {
ObjectMeta: api.ObjectMeta{DeletionTimestamp: &unversioned.Time{}},
Spec: api.PodSpec{NodeName: ""},
},
prune: true,
prune: false,
},
{
pod: api.Pod{
ObjectMeta: api.ObjectMeta{DeletionTimestamp: &unversioned.Time{}},
Spec: api.PodSpec{NodeName: "nonexistant"},
},
prune: true,
prune: false,
},
}

Expand Down
1 change: 1 addition & 0 deletions pkg/controller/podgc/BUILD
Expand Up @@ -43,6 +43,7 @@ go_test(
"//pkg/api/unversioned:go_default_library",
"//pkg/client/cache:go_default_library",
"//pkg/client/clientset_generated/internalclientset/fake:go_default_library",
"//pkg/labels:go_default_library",
"//pkg/util/sets:go_default_library",
],
)
23 changes: 21 additions & 2 deletions pkg/controller/podgc/gc_controller.go
Expand Up @@ -125,6 +125,7 @@ func (gcc *PodGCController) gc() {
gcc.gcTerminated(pods)
}
gcc.gcOrphaned(pods)
gcc.gcUnscheduledTerminating(pods)
}

func isPodTerminated(pod *api.Pod) bool {
Expand Down Expand Up @@ -168,7 +169,7 @@ func (gcc *PodGCController) gcTerminated(pods []*api.Pod) {
wait.Wait()
}

// cleanupOrphanedPods deletes pods that are bound to nodes that don't exist.
// gcOrphaned deletes pods that are bound to nodes that don't exist.
func (gcc *PodGCController) gcOrphaned(pods []*api.Pod) {
glog.V(4).Infof("GC'ing orphaned")

Expand All @@ -183,7 +184,25 @@ func (gcc *PodGCController) gcOrphaned(pods []*api.Pod) {
if err := gcc.deletePod(pod.Namespace, pod.Name); err != nil {
utilruntime.HandleError(err)
} else {
glog.V(4).Infof("Forced deletion of oprhaned Pod %s succeeded", pod.Name)
glog.V(0).Infof("Forced deletion of orphaned Pod %s succeeded", pod.Name)
}
}
}

// gcUnscheduledTerminating deletes pods that are terminating and haven't been scheduled to a particular node.
func (gcc *PodGCController) gcUnscheduledTerminating(pods []*api.Pod) {
glog.V(4).Infof("GC'ing unscheduled pods which are terminating.")

for _, pod := range pods {
if pod.DeletionTimestamp == nil || len(pod.Spec.NodeName) > 0 {
continue
}

glog.V(2).Infof("Found unscheduled terminating Pod %v not assigned to any Node. Deleting.", pod.Name)
if err := gcc.deletePod(pod.Namespace, pod.Name); err != nil {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

actually I'd rename this to 'forcefullyDeletePod', 'ungracefullyDeletPod' or something along those lines. Not necessarily in this PR:)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will do in a follow-up.

utilruntime.HandleError(err)
} else {
glog.V(0).Infof("Forced deletion of unscheduled terminating Pod %s succeeded", pod.Name)
}
}
}
Expand Down
91 changes: 90 additions & 1 deletion pkg/controller/podgc/gc_controller_test.go
Expand Up @@ -25,6 +25,7 @@ import (
"k8s.io/kubernetes/pkg/api/unversioned"
"k8s.io/kubernetes/pkg/client/cache"
"k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset/fake"
"k8s.io/kubernetes/pkg/labels"
"k8s.io/kubernetes/pkg/util/sets"
)

Expand Down Expand Up @@ -194,7 +195,12 @@ func TestGCOrphaned(t *testing.T) {
gcc.podController = &FakeController{}
gcc.nodeController = &FakeController{}

gcc.gc()
pods, err := gcc.podStore.List(labels.Everything())
if err != nil {
t.Errorf("Error while listing all Pods: %v", err)
return
}
gcc.gcOrphaned(pods)

pass := true
for _, pod := range deletedPodNames {
Expand All @@ -210,3 +216,86 @@ func TestGCOrphaned(t *testing.T) {
}
}
}

func TestGCUnscheduledTerminating(t *testing.T) {
type nameToPhase struct {
name string
phase api.PodPhase
deletionTimeStamp *unversioned.Time
nodeName string
}

testCases := []struct {
name string
pods []nameToPhase
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I generally find it useful to have a "name" or even better "description" per test case, which is printed with the failure, to make it easier to locate problematic test.

deletedPodNames sets.String
}{
{
name: "Unscheduled pod in any phase must be deleted",
pods: []nameToPhase{
{name: "a", phase: api.PodFailed, deletionTimeStamp: &unversioned.Time{}, nodeName: ""},
{name: "b", phase: api.PodSucceeded, deletionTimeStamp: &unversioned.Time{}, nodeName: ""},
{name: "c", phase: api.PodRunning, deletionTimeStamp: &unversioned.Time{}, nodeName: ""},
},
deletedPodNames: sets.NewString("a", "b", "c"),
},
{
name: "Scheduled pod in any phase must not be deleted",
pods: []nameToPhase{
{name: "a", phase: api.PodFailed, deletionTimeStamp: nil, nodeName: ""},
{name: "b", phase: api.PodSucceeded, deletionTimeStamp: nil, nodeName: "node"},
{name: "c", phase: api.PodRunning, deletionTimeStamp: &unversioned.Time{}, nodeName: "node"},
},
deletedPodNames: sets.NewString(),
},
}

for i, test := range testCases {
client := fake.NewSimpleClientset()
gcc := NewFromClient(client, -1)
deletedPodNames := make([]string, 0)
var lock sync.Mutex
gcc.deletePod = func(_, name string) error {
lock.Lock()
defer lock.Unlock()
deletedPodNames = append(deletedPodNames, name)
return nil
}

creationTime := time.Unix(0, 0)
for _, pod := range test.pods {
creationTime = creationTime.Add(1 * time.Hour)
gcc.podStore.Indexer.Add(&api.Pod{
ObjectMeta: api.ObjectMeta{Name: pod.name, CreationTimestamp: unversioned.Time{Time: creationTime},
DeletionTimestamp: pod.deletionTimeStamp},
Status: api.PodStatus{Phase: pod.phase},
Spec: api.PodSpec{NodeName: pod.nodeName},
})
}

store := cache.NewStore(cache.MetaNamespaceKeyFunc)
gcc.nodeStore = cache.StoreToNodeLister{Store: store}
gcc.podController = &FakeController{}
gcc.nodeController = &FakeController{}

pods, err := gcc.podStore.List(labels.Everything())
if err != nil {
t.Errorf("Error while listing all Pods: %v", err)
return
}
gcc.gcUnscheduledTerminating(pods)

pass := true
for _, pod := range deletedPodNames {
if !test.deletedPodNames.Has(pod) {
pass = false
}
}
if len(deletedPodNames) != len(test.deletedPodNames) {
pass = false
}
if !pass {
t.Errorf("[%v]pod's deleted expected and actual did not match.\n\texpected: %v\n\tactual: %v, test: %v", i, test.deletedPodNames, deletedPodNames, test.name)
}
}
}