Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use Allocatable to replace Capacity #19083

Merged
merged 2 commits into from
Jan 22, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
15 changes: 8 additions & 7 deletions plugin/pkg/scheduler/algorithm/predicates/predicates.go
Original file line number Diff line number Diff line change
Expand Up @@ -266,9 +266,9 @@ func getResourceRequest(pod *api.Pod) resourceRequest {
return result
}

func CheckPodsExceedingFreeResources(pods []*api.Pod, capacity api.ResourceList) (fitting []*api.Pod, notFittingCPU, notFittingMemory []*api.Pod) {
totalMilliCPU := capacity.Cpu().MilliValue()
totalMemory := capacity.Memory().Value()
func CheckPodsExceedingFreeResources(pods []*api.Pod, allocatable api.ResourceList) (fitting []*api.Pod, notFittingCPU, notFittingMemory []*api.Pod) {
totalMilliCPU := allocatable.Cpu().MilliValue()
totalMemory := allocatable.Memory().Value()
milliCPURequested := int64(0)
memoryRequested := int64(0)
for _, pod := range pods {
Expand Down Expand Up @@ -304,8 +304,9 @@ func (r *ResourceFit) PodFitsResources(pod *api.Pod, existingPods []*api.Pod, no
return false, err
}

if int64(len(existingPods))+1 > info.Status.Capacity.Pods().Value() {
glog.V(10).Infof("Cannot schedule Pod %+v, because Node %+v is full, running %v out of %v Pods.", podName(pod), node, len(existingPods), info.Status.Capacity.Pods().Value())
allocatable := info.Status.Allocatable
if int64(len(existingPods))+1 > allocatable.Pods().Value() {
glog.V(10).Infof("Cannot schedule Pod %+v, because Node %+v is full, running %v out of %v Pods.", podName(pod), node, len(existingPods), allocatable.Pods().Value())
return false, ErrExceededMaxPodNumber
}

Expand All @@ -315,7 +316,7 @@ func (r *ResourceFit) PodFitsResources(pod *api.Pod, existingPods []*api.Pod, no
}

pods := append(existingPods, pod)
_, exceedingCPU, exceedingMemory := CheckPodsExceedingFreeResources(pods, info.Status.Capacity)
_, exceedingCPU, exceedingMemory := CheckPodsExceedingFreeResources(pods, allocatable)
if len(exceedingCPU) > 0 {
glog.V(10).Infof("Cannot schedule Pod %+v, because Node %v does not have sufficient CPU", podName(pod), node)
return false, ErrInsufficientFreeCPU
Expand All @@ -324,7 +325,7 @@ func (r *ResourceFit) PodFitsResources(pod *api.Pod, existingPods []*api.Pod, no
glog.V(10).Infof("Cannot schedule Pod %+v, because Node %v does not have sufficient Memory", podName(pod), node)
return false, ErrInsufficientFreeMemory
}
glog.V(10).Infof("Schedule Pod %+v on Node %+v is allowed, Node is running only %v out of %v Pods.", podName(pod), node, len(pods)-1, info.Status.Capacity.Pods().Value())
glog.V(10).Infof("Schedule Pod %+v on Node %+v is allowed, Node is running only %v out of %v Pods.", podName(pod), node, len(pods)-1, allocatable.Pods().Value())
return true, nil
}

Expand Down
12 changes: 10 additions & 2 deletions plugin/pkg/scheduler/algorithm/predicates/predicates_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,14 @@ func makeResources(milliCPU int64, memory int64, pods int64) api.NodeResources {
}
}

func makeAllocatableResources(milliCPU int64, memory int64, pods int64) api.ResourceList {
return api.ResourceList{
api.ResourceCPU: *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
api.ResourceMemory: *resource.NewQuantity(memory, resource.BinarySI),
api.ResourcePods: *resource.NewQuantity(pods, resource.DecimalSI),
}
}

func newResourcePod(usage ...resourceRequest) *api.Pod {
containers := []api.Container{}
for _, req := range usage {
Expand Down Expand Up @@ -130,7 +138,7 @@ func TestPodFitsResources(t *testing.T) {
}

for _, test := range enoughPodsTests {
node := api.Node{Status: api.NodeStatus{Capacity: makeResources(10, 20, 32).Capacity}}
node := api.Node{Status: api.NodeStatus{Capacity: makeResources(10, 20, 32).Capacity, Allocatable: makeAllocatableResources(10, 20, 32)}}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Aren't you essentially changing the node-status updates here? Why would you breakout the Allocatable, separate from Capacity? IMHO we should create structure under capacity. I must admit I didn't fully review this until now. /cc @davidopp

Is capacity used in other areas differently?

Is this to allow for backwards compatibility?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@timothysc are you asking about the test or about the feature? API is here
https://github.com/kubernetes/kubernetes/pull/18189/files

IIUC the scheduler only needs to work with allocatable because we are guaranteed to either be using Kuebelets that publish allocatable, or the API server will default allocatable from capacity if Kubelet is not publishing allocatable.


fit := ResourceFit{FakeNodeInfo(node)}
fits, err := fit.PodFitsResources(test.pod, test.existingPods, "machine")
Expand Down Expand Up @@ -178,7 +186,7 @@ func TestPodFitsResources(t *testing.T) {
},
}
for _, test := range notEnoughPodsTests {
node := api.Node{Status: api.NodeStatus{Capacity: makeResources(10, 20, 1).Capacity}}
node := api.Node{Status: api.NodeStatus{Capacity: api.ResourceList{}, Allocatable: makeAllocatableResources(10, 20, 1)}}

fit := ResourceFit{FakeNodeInfo(node)}
fits, err := fit.PodFitsResources(test.pod, test.existingPods, "machine")
Expand Down
8 changes: 4 additions & 4 deletions plugin/pkg/scheduler/algorithm/priorities/priorities.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,8 @@ func getNonzeroRequests(requests *api.ResourceList) (int64, int64) {
func calculateResourceOccupancy(pod *api.Pod, node api.Node, pods []*api.Pod) schedulerapi.HostPriority {
totalMilliCPU := int64(0)
totalMemory := int64(0)
capacityMilliCPU := node.Status.Capacity.Cpu().MilliValue()
capacityMemory := node.Status.Capacity.Memory().Value()
capacityMilliCPU := node.Status.Allocatable.Cpu().MilliValue()
capacityMemory := node.Status.Allocatable.Memory().Value()

for _, existingPod := range pods {
for _, container := range existingPod.Spec.Containers {
Expand Down Expand Up @@ -208,8 +208,8 @@ func calculateBalancedResourceAllocation(pod *api.Pod, node api.Node, pods []*ap
totalMemory += memory
}

capacityMilliCPU := node.Status.Capacity.Cpu().MilliValue()
capacityMemory := node.Status.Capacity.Memory().Value()
capacityMilliCPU := node.Status.Allocatable.Cpu().MilliValue()
capacityMemory := node.Status.Allocatable.Memory().Value()

cpuFraction := fractionOfCapacity(totalMilliCPU, capacityMilliCPU)
memoryFraction := fractionOfCapacity(totalMemory, capacityMemory)
Expand Down
4 changes: 4 additions & 0 deletions plugin/pkg/scheduler/algorithm/priorities/priorities_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ func makeNode(node string, milliCPU, memory int64) api.Node {
"cpu": *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
"memory": *resource.NewQuantity(memory, resource.BinarySI),
},
Allocatable: api.ResourceList{
"cpu": *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
"memory": *resource.NewQuantity(memory, resource.BinarySI),
},
},
}
}
Expand Down
116 changes: 116 additions & 0 deletions test/integration/scheduler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -449,3 +449,119 @@ func createPod(name string, annotation map[string]string) *api.Pod {
},
}
}

// This test will verify scheduler can work well regardless of whether kubelet is allocatable aware or not.
func TestAllocatable(t *testing.T) {
framework.DeleteAllEtcdKeys()

var m *master.Master
s := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
m.Handler.ServeHTTP(w, req)
}))
defer s.Close()

masterConfig := framework.NewIntegrationTestMasterConfig()
m = master.New(masterConfig)

// 1. create and start default-scheduler
restClient := client.NewOrDie(&client.Config{Host: s.URL, GroupVersion: testapi.Default.GroupVersion()})

schedulerConfigFactory := factory.NewConfigFactory(restClient, api.DefaultSchedulerName)
schedulerConfig, err := schedulerConfigFactory.Create()
if err != nil {
t.Fatalf("Couldn't create scheduler config: %v", err)
}
eventBroadcaster := record.NewBroadcaster()
schedulerConfig.Recorder = eventBroadcaster.NewRecorder(api.EventSource{Component: api.DefaultSchedulerName})
eventBroadcaster.StartRecordingToSink(restClient.Events(""))
scheduler.New(schedulerConfig).Run()
// default-scheduler will be stopped later
defer close(schedulerConfig.StopEverything)

// 2. create a node without allocatable awareness
node := &api.Node{
ObjectMeta: api.ObjectMeta{Name: "node-allocatable-scheduler-test-node"},
Spec: api.NodeSpec{Unschedulable: false},
Status: api.NodeStatus{
Capacity: api.ResourceList{
api.ResourcePods: *resource.NewQuantity(32, resource.DecimalSI),
api.ResourceCPU: *resource.NewMilliQuantity(30, resource.DecimalSI),
api.ResourceMemory: *resource.NewQuantity(30, resource.BinarySI),
},
},
}

allocNode, err := restClient.Nodes().Create(node)
if err != nil {
t.Fatalf("Failed to create node: %v", err)
}

// 3. create resource pod which requires less than Capacity
podResource := &api.Pod{
ObjectMeta: api.ObjectMeta{Name: "pod-test-allocatable"},
Spec: api.PodSpec{
Containers: []api.Container{
{
Name: "container",
Image: "kubernetes/pause:go",
Resources: api.ResourceRequirements{
Requests: api.ResourceList{
api.ResourceCPU: *resource.NewMilliQuantity(20, resource.DecimalSI),
api.ResourceMemory: *resource.NewQuantity(20, resource.BinarySI),
},
},
},
},
},
}

testAllocPod, err := restClient.Pods(api.NamespaceDefault).Create(podResource)
if err != nil {
t.Fatalf("Test allocatable unawareness failed to create pod: %v", err)
}

// 4. Test: this test pod should be scheduled since api-server will use Capacity as Allocatable
err = wait.Poll(time.Second, time.Second*5, podScheduled(restClient, testAllocPod.Namespace, testAllocPod.Name))
if err != nil {
t.Errorf("Test allocatable unawareness: %s Pod not scheduled: %v", testAllocPod.Name, err)
} else {
t.Logf("Test allocatable unawareness: %s Pod scheduled", testAllocPod.Name)
}

// 5. Change the node status to allocatable aware, note that Allocatable is less than Pod's requirement
allocNode.Status = api.NodeStatus{
Capacity: api.ResourceList{
api.ResourcePods: *resource.NewQuantity(32, resource.DecimalSI),
api.ResourceCPU: *resource.NewMilliQuantity(30, resource.DecimalSI),
api.ResourceMemory: *resource.NewQuantity(30, resource.BinarySI),
},
Allocatable: api.ResourceList{
api.ResourcePods: *resource.NewQuantity(32, resource.DecimalSI),
api.ResourceCPU: *resource.NewMilliQuantity(10, resource.DecimalSI),
api.ResourceMemory: *resource.NewQuantity(10, resource.BinarySI),
},
}

if _, err := restClient.Nodes().UpdateStatus(allocNode); err != nil {
t.Fatalf("Failed to update node with Status.Allocatable: %v", err)
}

if err := restClient.Pods(api.NamespaceDefault).Delete(podResource.Name, &api.DeleteOptions{}); err != nil {
t.Fatalf("Failed to remove first resource pod: %v", err)
}

// 6. Make another pod with different name, same resource request
podResource.ObjectMeta.Name = "pod-test-allocatable2"
testAllocPod2, err := restClient.Pods(api.NamespaceDefault).Create(podResource)
if err != nil {
t.Fatalf("Test allocatable awareness failed to create pod: %v", err)
}

// 7. Test: this test pod should not be scheduled since it request more than Allocatable
err = wait.Poll(time.Second, time.Second*5, podScheduled(restClient, testAllocPod2.Namespace, testAllocPod2.Name))
if err == nil {
t.Errorf("Test allocatable awareness: %s Pod got scheduled unexpectly, %v", testAllocPod2.Name, err)
} else {
t.Logf("Test allocatable awareness: %s Pod not scheduled as expected", testAllocPod2.Name)
}
}