Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Moved node condition filter into a predicates. #50362

Merged
merged 2 commits into from
Aug 12, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 4 additions & 0 deletions plugin/pkg/scheduler/algorithm/predicates/error.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ var (
ErrNodeUnderMemoryPressure = newPredicateFailureError("NodeUnderMemoryPressure")
ErrNodeUnderDiskPressure = newPredicateFailureError("NodeUnderDiskPressure")
ErrNodeOutOfDisk = newPredicateFailureError("NodeOutOfDisk")
ErrNodeNotReady = newPredicateFailureError("NodeNotReady")
ErrNodeNetworkUnavailable = newPredicateFailureError("NodeNetworkUnavailable")
ErrNodeUnschedulable = newPredicateFailureError("NodeUnschedulable")
ErrNodeUnknownCondition = newPredicateFailureError("NodeUnknownCondition")
ErrVolumeNodeConflict = newPredicateFailureError("NoVolumeNodeConflict")
// ErrFakePredicate is used for test only. The fake predicates returning false also returns error
// as ErrFakePredicate.
Expand Down
31 changes: 31 additions & 0 deletions plugin/pkg/scheduler/algorithm/predicates/predicates.go
Original file line number Diff line number Diff line change
Expand Up @@ -1301,6 +1301,37 @@ func CheckNodeDiskPressurePredicate(pod *v1.Pod, meta interface{}, nodeInfo *sch
return true, nil, nil
}

// CheckNodeConditionPredicate checks if a pod can be scheduled on a node reporting out of disk,
// network unavailable and not ready condition. Only node conditions are accounted in this predicate.
func CheckNodeConditionPredicate(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) {
reasons := []algorithm.PredicateFailureReason{}

if nodeInfo == nil || nodeInfo.Node() == nil {
return false, []algorithm.PredicateFailureReason{ErrNodeUnknownCondition}, nil
}

node := nodeInfo.Node()
for _, cond := range node.Status.Conditions {
// We consider the node for scheduling only when its:
// - NodeReady condition status is ConditionTrue,
// - NodeOutOfDisk condition status is ConditionFalse,
// - NodeNetworkUnavailable condition status is ConditionFalse.
if cond.Type == v1.NodeReady && cond.Status != v1.ConditionTrue {
Copy link
Member

@yastij yastij Aug 9, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I find cond.Type == v1.NodeReady && cond.Status == v1.ConditionFalse easier to read WDYT @k82cn ? (same goes for the checks below)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we can only schedule pod to ready node, neither False nor Unknown.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yup indeed forgot the unknown one

reasons = append(reasons, ErrNodeNotReady)
} else if cond.Type == v1.NodeOutOfDisk && cond.Status != v1.ConditionFalse {
reasons = append(reasons, ErrNodeOutOfDisk)
} else if cond.Type == v1.NodeNetworkUnavailable && cond.Status != v1.ConditionFalse {
reasons = append(reasons, ErrNodeNetworkUnavailable)
}
}

if node.Spec.Unschedulable {
reasons = append(reasons, ErrNodeUnschedulable)
}

return len(reasons) == 0, reasons, nil
}

type VolumeNodeChecker struct {
pvInfo PersistentVolumeInfo
pvcInfo PersistentVolumeClaimInfo
Expand Down
72 changes: 72 additions & 0 deletions plugin/pkg/scheduler/algorithm/predicates/predicates_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3442,6 +3442,78 @@ func TestPodSchedulesOnNodeWithDiskPressureCondition(t *testing.T) {
}
}

func TestNodeConditionPredicate(t *testing.T) {
tests := []struct {
node *v1.Node
schedulable bool
}{
// node1 considered
{
node: &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "node1"}, Status: v1.NodeStatus{Conditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionTrue}}}},
schedulable: true,
},
// node2 ignored - node not Ready
{
node: &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "node2"}, Status: v1.NodeStatus{Conditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionFalse}}}},
schedulable: false,
},
// node3 ignored - node out of disk
{
node: &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "node3"}, Status: v1.NodeStatus{Conditions: []v1.NodeCondition{{Type: v1.NodeOutOfDisk, Status: v1.ConditionTrue}}}},
schedulable: false,
},

// node4 considered
{
node: &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "node4"}, Status: v1.NodeStatus{Conditions: []v1.NodeCondition{{Type: v1.NodeOutOfDisk, Status: v1.ConditionFalse}}}},
schedulable: true,
},
// node5 ignored - node out of disk
{
node: &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "node5"}, Status: v1.NodeStatus{Conditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionTrue}, {Type: v1.NodeOutOfDisk, Status: v1.ConditionTrue}}}},
schedulable: false,
},
// node6 considered
{
node: &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "node6"}, Status: v1.NodeStatus{Conditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionTrue}, {Type: v1.NodeOutOfDisk, Status: v1.ConditionFalse}}}},
schedulable: true,
},
// node7 ignored - node out of disk, node not Ready
{
node: &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "node7"}, Status: v1.NodeStatus{Conditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionFalse}, {Type: v1.NodeOutOfDisk, Status: v1.ConditionTrue}}}},
schedulable: false,
},
// node8 ignored - node not Ready
{
node: &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "node8"}, Status: v1.NodeStatus{Conditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionFalse}, {Type: v1.NodeOutOfDisk, Status: v1.ConditionFalse}}}},
schedulable: false,
},
// node9 ignored - node unschedulable
{
node: &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "node9"}, Spec: v1.NodeSpec{Unschedulable: true}},
schedulable: false,
},
// node10 considered
{
node: &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "node10"}, Spec: v1.NodeSpec{Unschedulable: false}},
schedulable: true,
},
// node11 considered
{
node: &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "node11"}},
schedulable: true,
},
}

for _, test := range tests {
nodeInfo := makeEmptyNodeInfo(test.node)
if fit, reasons, err := CheckNodeConditionPredicate(nil, nil, nodeInfo); fit != test.schedulable {
t.Errorf("%s: expected: %t, got %t; %+v, %v",
test.node.Name, test.schedulable, fit, reasons, err)
}
}
}

func createPodWithVolume(pod, pv, pvc string) *v1.Pod {
return &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: pod, Namespace: "default"},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,79 @@ func TestCompatibility_v1_Scheduler(t *testing.T) {
},
},
},
// Do not change this JSON after the corresponding release has been tagged.
// A failure indicates backwards compatibility with the specified release was broken.
"1.8": {
JSON: `{
"kind": "Policy",
"apiVersion": "v1",
"predicates": [
{"name": "MatchNodeSelector"},
{"name": "PodFitsResources"},
{"name": "PodFitsHostPorts"},
{"name": "HostName"},
{"name": "NoDiskConflict"},
{"name": "NoVolumeZoneConflict"},
{"name": "PodToleratesNodeTaints"},
{"name": "CheckNodeMemoryPressure"},
{"name": "CheckNodeDiskPressure"},
{"name": "CheckNodeCondition"},
{"name": "MaxEBSVolumeCount"},
{"name": "MaxGCEPDVolumeCount"},
{"name": "MaxAzureDiskVolumeCount"},
{"name": "MatchInterPodAffinity"},
{"name": "GeneralPredicates"},
{"name": "TestServiceAffinity", "argument": {"serviceAffinity" : {"labels" : ["region"]}}},
{"name": "TestLabelsPresence", "argument": {"labelsPresence" : {"labels" : ["foo"], "presence":true}}},
{"name": "NoVolumeNodeConflict"}
],"priorities": [
{"name": "EqualPriority", "weight": 2},
{"name": "ImageLocalityPriority", "weight": 2},
{"name": "LeastRequestedPriority", "weight": 2},
{"name": "BalancedResourceAllocation", "weight": 2},
{"name": "SelectorSpreadPriority", "weight": 2},
{"name": "NodePreferAvoidPodsPriority", "weight": 2},
{"name": "NodeAffinityPriority", "weight": 2},
{"name": "TaintTolerationPriority", "weight": 2},
{"name": "InterPodAffinityPriority", "weight": 2},
{"name": "MostRequestedPriority", "weight": 2}
]
}`,
ExpectedPolicy: schedulerapi.Policy{
Predicates: []schedulerapi.PredicatePolicy{
{Name: "MatchNodeSelector"},
{Name: "PodFitsResources"},
{Name: "PodFitsHostPorts"},
{Name: "HostName"},
{Name: "NoDiskConflict"},
{Name: "NoVolumeZoneConflict"},
{Name: "PodToleratesNodeTaints"},
{Name: "CheckNodeMemoryPressure"},
{Name: "CheckNodeDiskPressure"},
{Name: "CheckNodeCondition"},
{Name: "MaxEBSVolumeCount"},
{Name: "MaxGCEPDVolumeCount"},
{Name: "MaxAzureDiskVolumeCount"},
{Name: "MatchInterPodAffinity"},
{Name: "GeneralPredicates"},
{Name: "TestServiceAffinity", Argument: &schedulerapi.PredicateArgument{ServiceAffinity: &schedulerapi.ServiceAffinity{Labels: []string{"region"}}}},
{Name: "TestLabelsPresence", Argument: &schedulerapi.PredicateArgument{LabelsPresence: &schedulerapi.LabelsPresence{Labels: []string{"foo"}, Presence: true}}},
{Name: "NoVolumeNodeConflict"},
},
Priorities: []schedulerapi.PriorityPolicy{
{Name: "EqualPriority", Weight: 2},
{Name: "ImageLocalityPriority", Weight: 2},
{Name: "LeastRequestedPriority", Weight: 2},
{Name: "BalancedResourceAllocation", Weight: 2},
{Name: "SelectorSpreadPriority", Weight: 2},
{Name: "NodePreferAvoidPodsPriority", Weight: 2},
{Name: "NodeAffinityPriority", Weight: 2},
{Name: "TaintTolerationPriority", Weight: 2},
{Name: "InterPodAffinityPriority", Weight: 2},
{Name: "MostRequestedPriority", Weight: 2},
},
},
},
}

registeredPredicates := sets.NewString(factory.ListRegisteredFitPredicates()...)
Expand Down
4 changes: 4 additions & 0 deletions plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,10 @@ func defaultPredicates() sets.String {
// Fit is determined by node disk pressure condition.
factory.RegisterFitPredicate("CheckNodeDiskPressure", predicates.CheckNodeDiskPressurePredicate),

// Fit is determied by node condtions: not ready, network unavailable and out of disk.
factory.RegisterFitPredicate("CheckNodeCondition", predicates.CheckNodeConditionPredicate),
factory.RegisterMandatoryFitPredicate("CheckNodeCondition", predicates.CheckNodeConditionPredicate),

// Fit is determined by volume zone requirements.
factory.RegisterFitPredicateFactory(
"NoVolumeNodeConflict",
Expand Down
45 changes: 8 additions & 37 deletions plugin/pkg/scheduler/factory/factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -705,7 +705,7 @@ func (f *ConfigFactory) CreateFromKeys(predicateKeys, priorityKeys sets.String,
SchedulerCache: f.schedulerCache,
Ecache: f.equivalencePodCache,
// The scheduler only needs to consider schedulable nodes.
NodeLister: &nodePredicateLister{f.nodeLister},
NodeLister: &nodeLister{f.nodeLister},
Algorithm: algo,
Binder: f.getBinder(extenders),
PodConditionUpdater: &podConditionUpdater{f.client},
Expand All @@ -720,12 +720,12 @@ func (f *ConfigFactory) CreateFromKeys(predicateKeys, priorityKeys sets.String,
}, nil
}

type nodePredicateLister struct {
type nodeLister struct {
corelisters.NodeLister
}

func (n *nodePredicateLister) List() ([]*v1.Node, error) {
return n.ListWithPredicate(getNodeConditionPredicate())
func (n *nodeLister) List() ([]*v1.Node, error) {
return n.NodeLister.List(labels.Everything())
}

func (f *ConfigFactory) GetPriorityFunctionConfigs(priorityKeys sets.String) ([]algorithm.PriorityConfig, error) {
Expand Down Expand Up @@ -770,11 +770,10 @@ func (f *ConfigFactory) getPluginArgs() (*PluginFactoryArgs, error) {
ControllerLister: f.controllerLister,
ReplicaSetLister: f.replicaSetLister,
StatefulSetLister: f.statefulSetLister,
// All fit predicates only need to consider schedulable nodes.
NodeLister: &nodePredicateLister{f.nodeLister},
NodeInfo: &predicates.CachedNodeInfo{NodeLister: f.nodeLister},
PVInfo: &predicates.CachedPersistentVolumeInfo{PersistentVolumeLister: f.pVLister},
PVCInfo: &predicates.CachedPersistentVolumeClaimInfo{PersistentVolumeClaimLister: f.pVCLister},
NodeLister: &nodeLister{f.nodeLister},
NodeInfo: &predicates.CachedNodeInfo{NodeLister: f.nodeLister},
PVInfo: &predicates.CachedPersistentVolumeInfo{PersistentVolumeLister: f.pVLister},
PVCInfo: &predicates.CachedPersistentVolumeClaimInfo{PersistentVolumeClaimLister: f.pVCLister},
HardPodAffinitySymmetricWeight: f.hardPodAffinitySymmetricWeight,
}, nil
}
Expand All @@ -793,34 +792,6 @@ func (f *ConfigFactory) ResponsibleForPod(pod *v1.Pod) bool {
return f.schedulerName == pod.Spec.SchedulerName
}

func getNodeConditionPredicate() corelisters.NodeConditionPredicate {
return func(node *v1.Node) bool {
for i := range node.Status.Conditions {
cond := &node.Status.Conditions[i]
// We consider the node for scheduling only when its:
// - NodeReady condition status is ConditionTrue,
// - NodeOutOfDisk condition status is ConditionFalse,
// - NodeNetworkUnavailable condition status is ConditionFalse.
if cond.Type == v1.NodeReady && cond.Status != v1.ConditionTrue {
glog.V(4).Infof("Ignoring node %v with %v condition status %v", node.Name, cond.Type, cond.Status)
return false
} else if cond.Type == v1.NodeOutOfDisk && cond.Status != v1.ConditionFalse {
glog.V(4).Infof("Ignoring node %v with %v condition status %v", node.Name, cond.Type, cond.Status)
return false
} else if cond.Type == v1.NodeNetworkUnavailable && cond.Status != v1.ConditionFalse {
glog.V(4).Infof("Ignoring node %v with %v condition status %v", node.Name, cond.Type, cond.Status)
return false
}
}
// Ignore nodes that are marked unschedulable
if node.Spec.Unschedulable {
glog.V(4).Infof("Ignoring node %v since it is unschedulable", node.Name)
return false
}
return true
}
}

// unassignedNonTerminatedPod selects pods that are unassigned and non-terminal.
func unassignedNonTerminatedPod(pod *v1.Pod) bool {
if len(pod.Spec.NodeName) != 0 {
Expand Down
43 changes: 0 additions & 43 deletions plugin/pkg/scheduler/factory/factory_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -525,46 +525,3 @@ func TestInvalidFactoryArgs(t *testing.T) {
}

}

func TestNodeConditionPredicate(t *testing.T) {
nodeFunc := getNodeConditionPredicate()
nodeList := &v1.NodeList{
Items: []v1.Node{
// node1 considered
{ObjectMeta: metav1.ObjectMeta{Name: "node1"}, Status: v1.NodeStatus{Conditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionTrue}}}},
// node2 ignored - node not Ready
{ObjectMeta: metav1.ObjectMeta{Name: "node2"}, Status: v1.NodeStatus{Conditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionFalse}}}},
// node3 ignored - node out of disk
{ObjectMeta: metav1.ObjectMeta{Name: "node3"}, Status: v1.NodeStatus{Conditions: []v1.NodeCondition{{Type: v1.NodeOutOfDisk, Status: v1.ConditionTrue}}}},
// node4 considered
{ObjectMeta: metav1.ObjectMeta{Name: "node4"}, Status: v1.NodeStatus{Conditions: []v1.NodeCondition{{Type: v1.NodeOutOfDisk, Status: v1.ConditionFalse}}}},

// node5 ignored - node out of disk
{ObjectMeta: metav1.ObjectMeta{Name: "node5"}, Status: v1.NodeStatus{Conditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionTrue}, {Type: v1.NodeOutOfDisk, Status: v1.ConditionTrue}}}},
// node6 considered
{ObjectMeta: metav1.ObjectMeta{Name: "node6"}, Status: v1.NodeStatus{Conditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionTrue}, {Type: v1.NodeOutOfDisk, Status: v1.ConditionFalse}}}},
// node7 ignored - node out of disk, node not Ready
{ObjectMeta: metav1.ObjectMeta{Name: "node7"}, Status: v1.NodeStatus{Conditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionFalse}, {Type: v1.NodeOutOfDisk, Status: v1.ConditionTrue}}}},
// node8 ignored - node not Ready
{ObjectMeta: metav1.ObjectMeta{Name: "node8"}, Status: v1.NodeStatus{Conditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionFalse}, {Type: v1.NodeOutOfDisk, Status: v1.ConditionFalse}}}},

// node9 ignored - node unschedulable
{ObjectMeta: metav1.ObjectMeta{Name: "node9"}, Spec: v1.NodeSpec{Unschedulable: true}},
// node10 considered
{ObjectMeta: metav1.ObjectMeta{Name: "node10"}, Spec: v1.NodeSpec{Unschedulable: false}},
// node11 considered
{ObjectMeta: metav1.ObjectMeta{Name: "node11"}},
},
}

nodeNames := []string{}
for _, node := range nodeList.Items {
if nodeFunc(&node) {
nodeNames = append(nodeNames, node.Name)
}
}
expectedNodes := []string{"node1", "node4", "node6", "node10", "node11"}
if !reflect.DeepEqual(expectedNodes, nodeNames) {
t.Errorf("expected: %v, got %v", expectedNodes, nodeNames)
}
}
26 changes: 23 additions & 3 deletions plugin/pkg/scheduler/factory/plugins.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,10 @@ var (
schedulerFactoryMutex sync.Mutex

// maps that hold registered algorithm types
fitPredicateMap = make(map[string]FitPredicateFactory)
priorityFunctionMap = make(map[string]PriorityConfigFactory)
algorithmProviderMap = make(map[string]AlgorithmProviderConfig)
fitPredicateMap = make(map[string]FitPredicateFactory)
mandatoryFitPredicateMap = make(map[string]FitPredicateFactory)
priorityFunctionMap = make(map[string]PriorityConfigFactory)
algorithmProviderMap = make(map[string]AlgorithmProviderConfig)

// Registered metadata producers
priorityMetadataProducer MetadataProducerFactory
Expand All @@ -99,6 +100,17 @@ func RegisterFitPredicate(name string, predicate algorithm.FitPredicate) string
return RegisterFitPredicateFactory(name, func(PluginFactoryArgs) algorithm.FitPredicate { return predicate })
}

// RegisterMandatoryFitPredicate registers a fit predicate with the algorithm registry, the predicate is used by
// kubelet, DaemonSet; it is always included in configuration. Returns the name with which the predicate was
// registered.
func RegisterMandatoryFitPredicate(name string, predicate algorithm.FitPredicate) string {
schedulerFactoryMutex.Lock()
defer schedulerFactoryMutex.Unlock()
validateAlgorithmNameOrDie(name)
mandatoryFitPredicateMap[name] = func(PluginFactoryArgs) algorithm.FitPredicate { return predicate }
return name
}

// RegisterFitPredicateFactory registers a fit predicate factory with the
// algorithm registry. Returns the name with which the predicate was registered.
func RegisterFitPredicateFactory(name string, predicateFactory FitPredicateFactory) string {
Expand Down Expand Up @@ -308,6 +320,14 @@ func getFitPredicateFunctions(names sets.String, args PluginFactoryArgs) (map[st
}
predicates[name] = factory(args)
}

// Always include required fit predicates.
for name, factory := range mandatoryFitPredicateMap {
if _, found := predicates[name]; !found {
predicates[name] = factory(args)
}
}

return predicates, nil
}

Expand Down
3 changes: 2 additions & 1 deletion test/integration/scheduler/scheduler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,8 @@ func TestSchedulerCreationFromConfigMap(t *testing.T) {
// Verify that the config is applied correctly.
schedPredicates := sched.Config().Algorithm.Predicates()
schedPrioritizers := sched.Config().Algorithm.Prioritizers()
if len(schedPredicates) != 2 || len(schedPrioritizers) != 2 {
// Includes one mandatory predicates.
if len(schedPredicates) != 3 || len(schedPrioritizers) != 2 {
t.Errorf("Unexpected number of predicates or priority functions. Number of predicates: %v, number of prioritizers: %v", len(schedPredicates), len(schedPrioritizers))
}
// Check a predicate and a priority function.
Expand Down