Skip to content

Commit

Permalink
Ensure deletion of pods in queues and cache
Browse files Browse the repository at this point in the history
When the client misses a delete event from the watcher, it will use the last state of the pod in the informer cache to produce a delete event. At that point, it's not clear if the pod was in the queues or the cache, so we should issue a deletion in both.

The pod could be assumed, so deletion of assumed pods from the cache should work.

Change-Id: I11ce9785de603924fc121fe2fa6ed5cb1e16922f
  • Loading branch information
alculquicondor committed Nov 25, 2021
1 parent 8fa0073 commit edaf660
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 51 deletions.
10 changes: 7 additions & 3 deletions pkg/scheduler/eventhandlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -280,8 +280,10 @@ func addAllEventHandlers(
case *v1.Pod:
return assignedPod(t)
case cache.DeletedFinalStateUnknown:
if pod, ok := t.Obj.(*v1.Pod); ok {
return assignedPod(pod)
if _, ok := t.Obj.(*v1.Pod); ok {
// The carried object may be stale, so we don't use it to check if
// it's assigned or not. Attempting to cleanup anyways.
return true
}
utilruntime.HandleError(fmt.Errorf("unable to convert object %T to *v1.Pod in %T", obj, sched))
return false
Expand All @@ -306,7 +308,9 @@ func addAllEventHandlers(
return !assignedPod(t) && responsibleForPod(t, sched.Profiles)
case cache.DeletedFinalStateUnknown:
if pod, ok := t.Obj.(*v1.Pod); ok {
return !assignedPod(pod) && responsibleForPod(pod, sched.Profiles)
// The carried object may be stale, so we don't use it to check if
// it's assigned or not.
return responsibleForPod(pod, sched.Profiles)
}
utilruntime.HandleError(fmt.Errorf("unable to convert object %T to *v1.Pod in %T", obj, sched))
return false
Expand Down
17 changes: 7 additions & 10 deletions pkg/scheduler/internal/cache/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -537,22 +537,19 @@ func (cache *schedulerCache) RemovePod(pod *v1.Pod) error {

currState, ok := cache.podStates[key]
switch {
// An assumed pod won't have Delete/Remove event. It needs to have Add event
// before Remove event, in which case the state would change from Assumed to Added.
case ok && !cache.assumedPods.Has(key):
case ok:
if currState.pod.Spec.NodeName != pod.Spec.NodeName {
klog.Errorf("Pod %v was assumed to be on %v but got added to %v", key, pod.Spec.NodeName, currState.pod.Spec.NodeName)
klog.Fatalf("Schedulercache is corrupted and can badly affect scheduling decisions")
}
err := cache.removePod(currState.pod)
if err != nil {
return err
if pod.Spec.NodeName != "" {
// An empty NodeName is possible when the scheduler misses a Delete
// event and it gets the last known state from the informer cache.
klog.Fatalf("Schedulercache is corrupted and can badly affect scheduling decisions")
}
}
delete(cache.podStates, key)
return cache.expirePod(key, currState)
default:
return fmt.Errorf("pod %v is not found in scheduler cache, so cannot be removed from it", key)
}
return nil
}

func (cache *schedulerCache) IsAssumedPod(pod *v1.Pod) (bool, error) {
Expand Down
81 changes: 43 additions & 38 deletions pkg/scheduler/internal/cache/cache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -807,57 +807,62 @@ func TestEphemeralStorageResource(t *testing.T) {

// TestRemovePod tests after added pod is removed, its information should also be subtracted.
func TestRemovePod(t *testing.T) {
basePod := makeBasePod(t, "node-1", "test", "100m", "500", "", []v1.ContainerPort{{HostIP: "127.0.0.1", HostPort: 80, Protocol: "TCP"}})
tests := []struct {
nodes []*v1.Node
pod *v1.Pod
wNodeInfo *framework.NodeInfo
}{{
nodes: []*v1.Node{
{
ObjectMeta: metav1.ObjectMeta{Name: "node-1"},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "node-2"},
},
pod := makeBasePod(t, "node-1", "test", "100m", "500", "", []v1.ContainerPort{{HostIP: "127.0.0.1", HostPort: 80, Protocol: "TCP"}})
nodes := []*v1.Node{
{
ObjectMeta: metav1.ObjectMeta{Name: "node-1"},
},
pod: basePod,
wNodeInfo: newNodeInfo(
&framework.Resource{
MilliCPU: 100,
Memory: 500,
},
&framework.Resource{
MilliCPU: 100,
Memory: 500,
},
[]*v1.Pod{basePod},
newHostPortInfoBuilder().add("TCP", "127.0.0.1", 80).build(),
make(map[string]*framework.ImageStateSummary),
),
}}
{
ObjectMeta: metav1.ObjectMeta{Name: "node-2"},
},
}
wNodeInfo := newNodeInfo(
&framework.Resource{
MilliCPU: 100,
Memory: 500,
},
&framework.Resource{
MilliCPU: 100,
Memory: 500,
},
[]*v1.Pod{pod},
newHostPortInfoBuilder().add("TCP", "127.0.0.1", 80).build(),
make(map[string]*framework.ImageStateSummary),
)
tests := map[string]struct {
assume bool
}{
"bound": {},
"assumed": {assume: true},
}

for i, tt := range tests {
t.Run(fmt.Sprintf("case_%d", i), func(t *testing.T) {
nodeName := tt.pod.Spec.NodeName
for name, tt := range tests {
t.Run(name, func(t *testing.T) {
nodeName := pod.Spec.NodeName
cache := newSchedulerCache(time.Second, time.Second, nil)
// Add pod succeeds even before adding the nodes.
if err := cache.AddPod(tt.pod); err != nil {
t.Fatalf("AddPod failed: %v", err)
// Add/Assume pod succeeds even before adding the nodes.
if tt.assume {
if err := cache.AddPod(pod); err != nil {
t.Fatalf("AddPod failed: %v", err)
}
} else {
if err := cache.AssumePod(pod); err != nil {
t.Fatalf("AssumePod failed: %v", err)
}
}
n := cache.nodes[nodeName]
if err := deepEqualWithoutGeneration(n, tt.wNodeInfo); err != nil {
if err := deepEqualWithoutGeneration(n, wNodeInfo); err != nil {
t.Error(err)
}
for _, n := range tt.nodes {
for _, n := range nodes {
cache.AddNode(n)
}

if err := cache.RemovePod(tt.pod); err != nil {
if err := cache.RemovePod(pod); err != nil {
t.Fatalf("RemovePod failed: %v", err)
}

if _, err := cache.GetPod(tt.pod); err == nil {
if _, err := cache.GetPod(pod); err == nil {
t.Errorf("pod was not deleted")
}

Expand Down

0 comments on commit edaf660

Please sign in to comment.