Skip to content

Commit

Permalink
Use pod UID as cache key instead of namespace/name
Browse files Browse the repository at this point in the history
UID uniquely identifies pods across lifecycles, while namespace/name
could be 2 different pods across lifecycles. This could result in
tricky scheduler bugs.

Fixes kubernetes#60966
  • Loading branch information
Yongkun Anfernee Gui authored and tossmilestone committed Jul 25, 2018
1 parent affcb1f commit 5ce4c61
Show file tree
Hide file tree
Showing 7 changed files with 310 additions and 138 deletions.
1 change: 1 addition & 0 deletions plugin/pkg/scheduler/BUILD
Expand Up @@ -25,6 +25,7 @@ go_test(
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/types:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/diff:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/wait:go_default_library",
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
Expand Down
1 change: 1 addition & 0 deletions plugin/pkg/scheduler/core/BUILD
Expand Up @@ -30,6 +30,7 @@ go_test(
"//vendor/k8s.io/api/extensions/v1beta1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/types:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/wait:go_default_library",
],
Expand Down
326 changes: 206 additions & 120 deletions plugin/pkg/scheduler/core/generic_scheduler_test.go

Large diffs are not rendered by default.

42 changes: 27 additions & 15 deletions plugin/pkg/scheduler/scheduler_test.go
Expand Up @@ -27,6 +27,7 @@ import (
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/diff"
"k8s.io/apimachinery/pkg/util/wait"
utilfeature "k8s.io/apiserver/pkg/util/feature"
Expand Down Expand Up @@ -75,7 +76,11 @@ func (fp fakePodPreemptor) RemoveNominatedNodeAnnotation(pod *v1.Pod) error {

func podWithID(id, desiredHost string) *v1.Pod {
return &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: id, SelfLink: util.Test.SelfLink(string(v1.ResourcePods), id)},
ObjectMeta: metav1.ObjectMeta{
Name: id,
UID: types.UID(id),
SelfLink: util.Test.SelfLink(string(v1.ResourcePods), id),
},
Spec: v1.PodSpec{
NodeName: desiredHost,
},
Expand All @@ -85,7 +90,12 @@ func podWithID(id, desiredHost string) *v1.Pod {
func deletingPod(id string) *v1.Pod {
deletionTimestamp := metav1.Now()
return &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: id, SelfLink: util.Test.SelfLink(string(v1.ResourcePods), id), DeletionTimestamp: &deletionTimestamp},
ObjectMeta: metav1.ObjectMeta{
Name: id,
UID: types.UID(id),
SelfLink: util.Test.SelfLink(string(v1.ResourcePods), id),
DeletionTimestamp: &deletionTimestamp,
},
Spec: v1.PodSpec{
NodeName: "",
},
Expand Down Expand Up @@ -133,7 +143,7 @@ func TestScheduler(t *testing.T) {
eventBroadcaster.StartLogging(t.Logf).Stop()
errS := errors.New("scheduler")
errB := errors.New("binder")
testNode := v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "machine1"}}
testNode := v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "machine1", UID: types.UID("machine1")}}

table := []struct {
injectBindError error
Expand All @@ -149,7 +159,7 @@ func TestScheduler(t *testing.T) {
{
sendPod: podWithID("foo", ""),
algo: mockScheduler{testNode.Name, nil},
expectBind: &v1.Binding{ObjectMeta: metav1.ObjectMeta{Name: "foo"}, Target: v1.ObjectReference{Kind: "Node", Name: testNode.Name}},
expectBind: &v1.Binding{ObjectMeta: metav1.ObjectMeta{Name: "foo", UID: types.UID("foo")}, Target: v1.ObjectReference{Kind: "Node", Name: testNode.Name}},
expectAssumedPod: podWithID("foo", testNode.Name),
eventReason: "Scheduled",
}, {
Expand All @@ -161,7 +171,7 @@ func TestScheduler(t *testing.T) {
}, {
sendPod: podWithID("foo", ""),
algo: mockScheduler{testNode.Name, nil},
expectBind: &v1.Binding{ObjectMeta: metav1.ObjectMeta{Name: "foo"}, Target: v1.ObjectReference{Kind: "Node", Name: testNode.Name}},
expectBind: &v1.Binding{ObjectMeta: metav1.ObjectMeta{Name: "foo", UID: types.UID("foo")}, Target: v1.ObjectReference{Kind: "Node", Name: testNode.Name}},
expectAssumedPod: podWithID("foo", testNode.Name),
injectBindError: errB,
expectError: errB,
Expand Down Expand Up @@ -246,7 +256,7 @@ func TestSchedulerNoPhantomPodAfterExpire(t *testing.T) {
queuedPodStore := clientcache.NewFIFO(clientcache.MetaNamespaceKeyFunc)
scache := schedulercache.New(100*time.Millisecond, stop)
pod := podWithPort("pod.Name", "", 8080)
node := v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "machine1"}}
node := v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "machine1", UID: types.UID("machine1")}}
scache.AddNode(&node)
nodeLister := schedulertesting.FakeNodeLister([]*v1.Node{&node})
predicateMap := map[string]algorithm.FitPredicate{"PodFitsHostPorts": predicates.PodFitsHostPorts}
Expand Down Expand Up @@ -287,7 +297,7 @@ func TestSchedulerNoPhantomPodAfterExpire(t *testing.T) {
select {
case b := <-bindingChan:
expectBinding := &v1.Binding{
ObjectMeta: metav1.ObjectMeta{Name: "bar"},
ObjectMeta: metav1.ObjectMeta{Name: "bar", UID: types.UID("bar")},
Target: v1.ObjectReference{Kind: "Node", Name: node.Name},
}
if !reflect.DeepEqual(expectBinding, b) {
Expand All @@ -304,7 +314,7 @@ func TestSchedulerNoPhantomPodAfterDelete(t *testing.T) {
queuedPodStore := clientcache.NewFIFO(clientcache.MetaNamespaceKeyFunc)
scache := schedulercache.New(10*time.Minute, stop)
firstPod := podWithPort("pod.Name", "", 8080)
node := v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "machine1"}}
node := v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "machine1", UID: types.UID("machine1")}}
scache.AddNode(&node)
nodeLister := schedulertesting.FakeNodeLister([]*v1.Node{&node})
predicateMap := map[string]algorithm.FitPredicate{"PodFitsHostPorts": predicates.PodFitsHostPorts}
Expand Down Expand Up @@ -348,7 +358,7 @@ func TestSchedulerNoPhantomPodAfterDelete(t *testing.T) {
select {
case b := <-bindingChan:
expectBinding := &v1.Binding{
ObjectMeta: metav1.ObjectMeta{Name: "bar"},
ObjectMeta: metav1.ObjectMeta{Name: "bar", UID: types.UID("bar")},
Target: v1.ObjectReference{Kind: "Node", Name: node.Name},
}
if !reflect.DeepEqual(expectBinding, b) {
Expand Down Expand Up @@ -387,7 +397,7 @@ func TestSchedulerErrorWithLongBinding(t *testing.T) {
queuedPodStore := clientcache.NewFIFO(clientcache.MetaNamespaceKeyFunc)
scache := schedulercache.New(test.CacheTTL, stop)

node := v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "machine1"}}
node := v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "machine1", UID: types.UID("machine1")}}
scache.AddNode(&node)

nodeLister := schedulertesting.FakeNodeLister([]*v1.Node{&node})
Expand Down Expand Up @@ -436,7 +446,7 @@ func setupTestSchedulerWithOnePodOnNode(t *testing.T, queuedPodStore *clientcach
select {
case b := <-bindingChan:
expectBinding := &v1.Binding{
ObjectMeta: metav1.ObjectMeta{Name: pod.Name},
ObjectMeta: metav1.ObjectMeta{Name: pod.Name, UID: types.UID(pod.Name)},
Target: v1.ObjectReference{Kind: "Node", Name: node.Name},
}
if !reflect.DeepEqual(expectBinding, b) {
Expand Down Expand Up @@ -468,8 +478,9 @@ func TestSchedulerFailedSchedulingReasons(t *testing.T) {
// create several nodes which cannot schedule the above pod
nodes := []*v1.Node{}
for i := 0; i < 100; i++ {
uid := fmt.Sprintf("machine%v", i)
node := v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: fmt.Sprintf("machine%v", i)},
ObjectMeta: metav1.ObjectMeta{Name: uid, UID: types.UID(uid)},
Status: v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourceCPU: *(resource.NewQuantity(cpu/2, resource.DecimalSI)),
Expand Down Expand Up @@ -609,7 +620,7 @@ func setupTestSchedulerLongBindingWithRetry(queuedPodStore *clientcache.FIFO, sc
}

func setupTestSchedulerWithVolumeBinding(fakeVolumeBinder *volumebinder.VolumeBinder, stop <-chan struct{}, broadcaster record.EventBroadcaster) (*Scheduler, chan *v1.Binding, chan error) {
testNode := v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "machine1"}}
testNode := v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "machine1", UID: types.UID("machine1")}}
nodeLister := schedulertesting.FakeNodeLister([]*v1.Node{&testNode})
queuedPodStore := clientcache.NewFIFO(clientcache.MetaNamespaceKeyFunc)
queuedPodStore.Add(podWithID("foo", ""))
Expand Down Expand Up @@ -663,8 +674,9 @@ func TestSchedulerWithVolumeBinding(t *testing.T) {
FindBoundSatsified: true,
},
expectAssumeCalled: true,
expectPodBind: &v1.Binding{ObjectMeta: metav1.ObjectMeta{Name: "foo"}, Target: v1.ObjectReference{Kind: "Node", Name: "machine1"}},
eventReason: "Scheduled",
expectPodBind: &v1.Binding{ObjectMeta: metav1.ObjectMeta{Name: "foo", UID: types.UID("foo")}, Target: v1.ObjectReference{Kind: "Node", Name: "machine1"}},

eventReason: "Scheduled",
},
"bound,invalid-pv-affinity": {
volumeBinderConfig: &persistentvolume.FakeVolumeBinderConfig{
Expand Down
2 changes: 1 addition & 1 deletion plugin/pkg/scheduler/schedulercache/BUILD
Expand Up @@ -20,7 +20,6 @@ go_library(
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/wait:go_default_library",
"//vendor/k8s.io/client-go/tools/cache:go_default_library",
],
)

Expand All @@ -37,6 +36,7 @@ go_test(
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/types:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/intstr:go_default_library",
],
)
Expand Down
68 changes: 68 additions & 0 deletions plugin/pkg/scheduler/schedulercache/cache_test.go
Expand Up @@ -28,6 +28,7 @@ import (
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/intstr"
priorityutil "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/priorities/util"
schedutil "k8s.io/kubernetes/plugin/pkg/scheduler/util"
Expand Down Expand Up @@ -549,6 +550,69 @@ func TestExpireAddUpdatePod(t *testing.T) {
}
}

func makePodWithEphemeralStorage(nodeName, ephemeralStorage string) *v1.Pod {
req := v1.ResourceList{
v1.ResourceEphemeralStorage: resource.MustParse(ephemeralStorage),
}
return &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Namespace: "default-namespace",
Name: "pod-with-ephemeral-storage",
UID: types.UID("pod-with-ephemeral-storage"),
},
Spec: v1.PodSpec{
Containers: []v1.Container{{
Resources: v1.ResourceRequirements{
Requests: req,
},
}},
NodeName: nodeName,
},
}
}

func TestEphemeralStorageResource(t *testing.T) {
nodeName := "node"
podE := makePodWithEphemeralStorage(nodeName, "500")
tests := []struct {
pod *v1.Pod
wNodeInfo *NodeInfo
}{
{
pod: podE,
wNodeInfo: &NodeInfo{
requestedResource: &Resource{
EphemeralStorage: 500,
},
nonzeroRequest: &Resource{
MilliCPU: priorityutil.DefaultMilliCPURequest,
Memory: priorityutil.DefaultMemoryRequest,
},
allocatableResource: &Resource{},
pods: []*v1.Pod{podE},
usedPorts: schedutil.HostPortInfo{},
},
},
}
for i, tt := range tests {
cache := newSchedulerCache(time.Second, time.Second, nil)
if err := cache.AddPod(tt.pod); err != nil {
t.Fatalf("AddPod failed: %v", err)
}
n := cache.nodes[nodeName]
deepEqualWithoutGeneration(t, i, n, tt.wNodeInfo)

if err := cache.RemovePod(tt.pod); err != nil {
t.Fatalf("RemovePod failed: %v", err)
}

n = cache.nodes[nodeName]
if n != nil {
t.Errorf("#%d: expecting pod deleted and nil node info, get=%s", i, n)
}
}
}

// TestRemovePod tests after added pod is removed, its information should also be subtracted.
func TestRemovePod(t *testing.T) {
nodeName := "node"
Expand Down Expand Up @@ -721,6 +785,7 @@ func TestNodeOperators(t *testing.T) {
{
ObjectMeta: metav1.ObjectMeta{
Name: "pod1",
UID: types.UID("pod1"),
},
Spec: v1.PodSpec{
NodeName: nodeName,
Expand Down Expand Up @@ -771,6 +836,7 @@ func TestNodeOperators(t *testing.T) {
{
ObjectMeta: metav1.ObjectMeta{
Name: "pod1",
UID: types.UID("pod1"),
},
Spec: v1.PodSpec{
NodeName: nodeName,
Expand All @@ -789,6 +855,7 @@ func TestNodeOperators(t *testing.T) {
{
ObjectMeta: metav1.ObjectMeta{
Name: "pod2",
UID: types.UID("pod2"),
},
Spec: v1.PodSpec{
NodeName: nodeName,
Expand Down Expand Up @@ -912,6 +979,7 @@ func makeBasePod(t testingMode, nodeName, objName, cpu, mem, extended string, po
}
return &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
UID: types.UID(objName),
Namespace: "node_info_cache_test",
Name: objName,
},
Expand Down
8 changes: 6 additions & 2 deletions plugin/pkg/scheduler/schedulercache/node_info.go
Expand Up @@ -17,13 +17,13 @@ limitations under the License.
package schedulercache

import (
"errors"
"fmt"

"github.com/golang/glog"

"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
clientcache "k8s.io/client-go/tools/cache"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
priorityutil "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/priorities/util"
"k8s.io/kubernetes/plugin/pkg/scheduler/util"
Expand Down Expand Up @@ -503,7 +503,11 @@ func (n *NodeInfo) FilterOutPods(pods []*v1.Pod) []*v1.Pod {

// getPodKey returns the string key of a pod.
func getPodKey(pod *v1.Pod) (string, error) {
return clientcache.MetaNamespaceKeyFunc(pod)
uid := string(pod.UID)
if len(uid) == 0 {
return "", errors.New("Cannot get cache key for pod with empty UID")
}
return uid, nil
}

// Filter implements PodFilter interface. It returns false only if the pod node name
Expand Down

0 comments on commit 5ce4c61

Please sign in to comment.