Skip to content

Commit

Permalink
UPSTREAM: <carry>: openshift: Implement scale from zero
Browse files Browse the repository at this point in the history
This change removes the older annotations in favor of the names used in
the specific providers. It also adds an annotation for max pods and
makes the GPU field optional. The new schema for these resources should
follow this pattern:

```yaml
apiVersion: v1
items:
- apiVersion: machine.openshift.io/v1beta1
  kind: MachineSet
  metadata:
    annotations:
      machine.openshift.io/cluster-api-autoscaler-node-group-min-size: "0"
      machine.openshift.io/cluster-api-autoscaler-node-group-max-size: "6"
      machine.openshift.io/vCPU: "2"
      machine.openshift.io/memoryMb: 8G
      machine.openshift.io/GPU: "1"
      machine.openshift.io/maxPods: "100"
```
  • Loading branch information
elmiko committed Mar 18, 2020
1 parent c08136a commit 9c4af16
Show file tree
Hide file tree
Showing 7 changed files with 74 additions and 82 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -159,10 +159,10 @@ func (r machineDeploymentScalableResource) InstanceMemoryCapacity() (resource.Qu
return parseMemoryCapacity(r.machineDeployment.Annotations)
}

func (r machineDeploymentScalableResource) InstancePodCapacity() (resource.Quantity, error) {
return parsePodCapacity(r.machineDeployment.Annotations)
}

func (r machineDeploymentScalableResource) InstanceGPUCapacity() (resource.Quantity, error) {
return parseGPUCapacity(r.machineDeployment.Annotations)
}

func (r machineDeploymentScalableResource) InstanceMaxPodsCapacity() (resource.Quantity, error) {
return parseMaxPodsCapacity(r.machineDeployment.Annotations)
}
Original file line number Diff line number Diff line change
Expand Up @@ -144,10 +144,10 @@ func (r machineSetScalableResource) InstanceMemoryCapacity() (resource.Quantity,
return parseMemoryCapacity(r.machineSet.Annotations)
}

func (r machineSetScalableResource) InstancePodCapacity() (resource.Quantity, error) {
return parsePodCapacity(r.machineSet.Annotations)
}

func (r machineSetScalableResource) InstanceGPUCapacity() (resource.Quantity, error) {
return parseGPUCapacity(r.machineSet.Annotations)
}

func (r machineSetScalableResource) InstanceMaxPodsCapacity() (resource.Quantity, error) {
return parseMaxPodsCapacity(r.machineSet.Annotations)
}
49 changes: 24 additions & 25 deletions cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
gpuapis "k8s.io/autoscaler/cluster-autoscaler/utils/gpu"
kubeletapis "k8s.io/kubernetes/pkg/kubelet/apis"
schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo"
)
Expand All @@ -32,6 +33,10 @@ const (
machineDeleteAnnotationKey = "machine.openshift.io/cluster-api-delete-machine"
machineAnnotationKey = "machine.openshift.io/machine"
debugFormat = "%s (min: %d, max: %d, replicas: %d)"

// This default for the maximum number of pods comes from the machine-config-operator
// see https://github.com/openshift/machine-config-operator/blob/2f1bd6d99131fa4471ed95543a51dec3d5922b2b/templates/worker/01-worker-kubelet/_base/files/kubelet.yaml#L19
defaultMaxPods = 250
)

type nodegroup struct {
Expand Down Expand Up @@ -233,7 +238,12 @@ func (ng *nodegroup) TemplateNodeInfo() (*schedulernodeinfo.NodeInfo, error) {
return nil, err
}

pod, err := ng.scalableResource.InstancePodCapacity()
gpu, err := ng.scalableResource.InstanceGPUCapacity()
if err != nil {
return nil, err
}

pod, err := ng.scalableResource.InstanceMaxPodsCapacity()
if err != nil {
return nil, err
}
Expand All @@ -242,30 +252,34 @@ func (ng *nodegroup) TemplateNodeInfo() (*schedulernodeinfo.NodeInfo, error) {
return nil, cloudprovider.ErrNotImplemented
}

if gpu.IsZero() {
gpu = zeroQuantity.DeepCopy()
}

if pod.IsZero() {
pod = *resource.NewQuantity(110, resource.DecimalSI)
pod = *resource.NewQuantity(defaultMaxPods, resource.DecimalSI)
}

capacity := map[corev1.ResourceName]resource.Quantity{
corev1.ResourceCPU: cpu,
corev1.ResourceMemory: mem,
corev1.ResourcePods: pod,
corev1.ResourceCPU: cpu,
corev1.ResourceMemory: mem,
corev1.ResourcePods: pod,
gpuapis.ResourceNvidiaGPU: gpu,
}

nodeName := fmt.Sprintf("%s-asg-%d", ng.Name(), rand.Int63())
node := corev1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: nodeName,
SelfLink: fmt.Sprintf("/api/v1/nodes/%s", nodeName),
Labels: map[string]string{},
Name: nodeName,
Labels: map[string]string{},
},
}

node.Status.Capacity = capacity
node.Status.Allocatable = capacity
node.Status.Conditions = cloudprovider.BuildReadyConditions()
node.Spec.Taints = ng.scalableResource.Taints()
node.Labels = joinStringMaps(ng.scalableResource.Labels(), buildGenericLabels(nodeName))
node.Labels = cloudprovider.JoinStringMaps(ng.scalableResource.Labels(), buildGenericLabels(nodeName))

nodeInfo := schedulernodeinfo.NewNodeInfo(cloudprovider.BuildKubeProxy(ng.Name()))
nodeInfo.SetNode(&node)
Expand Down Expand Up @@ -322,25 +336,10 @@ func newNodegroupFromMachineDeployment(controller *machineController, machineDep
}, nil
}

func joinStringMaps(labels ...map[string]string) map[string]string {
var m map[string]string

for i := range labels {
switch i {
case 0:
m = labels[i]
default:
m = cloudprovider.JoinStringMaps(m, labels[i])
}
}

return m
}

func buildGenericLabels(nodeName string) map[string]string {
m := make(map[string]string)
m[kubeletapis.LabelArch] = cloudprovider.DefaultArch
m[kubeletapis.LabelOS] = cloudprovider.DefaultOS
//m[kubeletapis.Hostname] = nodeName
m[corev1.LabelHostname] = nodeName
return m
}
Original file line number Diff line number Diff line change
Expand Up @@ -168,12 +168,6 @@ func TestNodeGroupNewNodeGroupConstructor(t *testing.T) {
t.Errorf("expected %q, got %q", expectedDebug, ng.Debug())
}

if ng.scalableResource.CanScaleFromZero() {
if _, err := ng.TemplateNodeInfo(); err != cloudprovider.ErrNotImplemented {
t.Error("expected error")
}
}

if exists := ng.Exist(); !exists {
t.Errorf("expected %t, got %t", true, exists)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,6 @@ type scalableResource interface {
CanScaleFromZero() bool
InstanceCPUCapacity() (resource.Quantity, error)
InstanceMemoryCapacity() (resource.Quantity, error)
InstancePodCapacity() (resource.Quantity, error)
InstanceGPUCapacity() (resource.Quantity, error)
InstanceMaxPodsCapacity() (resource.Quantity, error)
}
43 changes: 20 additions & 23 deletions cluster-autoscaler/cloudprovider/clusterapi/clusterapi_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,10 @@ const (
nodeGroupMinSizeAnnotationKey = "machine.openshift.io/cluster-api-autoscaler-node-group-min-size"
nodeGroupMaxSizeAnnotationKey = "machine.openshift.io/cluster-api-autoscaler-node-group-max-size"

nodeGroupInstanceCPUCapacity = "machine.openshift.io/cluster-api-instance-cpu-capacity"
nodeGroupInstanceGPUCapacity = "machine.openshift.io/cluster-api-instance-gpu-capacity"
nodeGroupInstanceMemoryCapacity = "machine.openshift.io/cluster-api-instance-memory-capacity"
nodeGroupInstancePodCapacity = "machine.openshift.io/cluster-api-instance-pod-capacity"
nodeGroupScaleFromZero = "machine.openshift.io/cluster-api-scale-from-zero"
cpuKey = "machine.openshift.io/vCPU"
memoryKey = "machine.openshift.io/memoryMb"
gpuKey = "machine.openshift.io/GPU"
maxPodsKey = "machine.openshift.io/maxPods"
)

var (
Expand Down Expand Up @@ -164,36 +163,34 @@ func normalizedProviderString(s string) normalizedProviderID {
}

func scaleFromZeroEnabled(annotations map[string]string) bool {
if val, exists := annotations[nodeGroupScaleFromZero]; exists {
return val == "true"
cpu := annotations[cpuKey]
mem := annotations[memoryKey]

if cpu != "" && mem != "" {
return true
}
return false
}

func parseCPUCapacity(annotations map[string]string) (resource.Quantity, error) {
if val, exists := annotations[nodeGroupInstanceCPUCapacity]; exists && val != "" {
func parseKey(annotations map[string]string, key string) (resource.Quantity, error) {
if val, exists := annotations[key]; exists && val != "" {
return resource.ParseQuantity(val)
}
return zeroQuantity.DeepCopy(), nil
}

func parseMemoryCapacity(annotations map[string]string) (resource.Quantity, error) {
if val, exists := annotations[nodeGroupInstanceMemoryCapacity]; exists && val != "" {
return resource.ParseQuantity(val)
}
return zeroQuantity.DeepCopy(), nil
func parseCPUCapacity(annotations map[string]string) (resource.Quantity, error) {
return parseKey(annotations, cpuKey)
}

func parsePodCapacity(annotations map[string]string) (resource.Quantity, error) {
if val, exists := annotations[nodeGroupInstancePodCapacity]; exists && val != "" {
return resource.ParseQuantity(val)
}
return zeroQuantity.DeepCopy(), nil
func parseMemoryCapacity(annotations map[string]string) (resource.Quantity, error) {
return parseKey(annotations, memoryKey)
}

func parseGPUCapacity(annotations map[string]string) (resource.Quantity, error) {
if val, exists := annotations[nodeGroupInstanceGPUCapacity]; exists && val != "" {
return resource.ParseQuantity(val)
}
return zeroQuantity.DeepCopy(), nil
return parseKey(annotations, gpuKey)
}

func parseMaxPodsCapacity(annotations map[string]string) (resource.Quantity, error) {
return parseKey(annotations, maxPodsKey)
}
Original file line number Diff line number Diff line change
Expand Up @@ -419,17 +419,19 @@ func TestScaleFromZeroEnabled(t *testing.T) {
annotations: map[string]string{"foo": "bar"},
enabled: false,
}, {
description: "matching key, value=!true annotation",
description: "matching key, incomplete annotations",
annotations: map[string]string{
"foo": "bar",
nodeGroupScaleFromZero: "false",
"foo": "bar",
cpuKey: "1",
gpuKey: "2",
},
enabled: false,
}, {
description: "matching key, value=true annotation",
description: "matching key, complete annotations",
annotations: map[string]string{
"foo": "bar",
nodeGroupScaleFromZero: "true",
"foo": "bar",
cpuKey: "1",
memoryKey: "2",
},
enabled: true,
}} {
Expand Down Expand Up @@ -459,12 +461,12 @@ func TestParseCPUCapacity(t *testing.T) {
expectedError: false,
}, {
description: "bad quantity",
annotations: map[string]string{nodeGroupInstanceCPUCapacity: "not-a-quantity"},
annotations: map[string]string{cpuKey: "not-a-quantity"},
expectedQuantity: zeroQuantity.DeepCopy(),
expectedError: true,
}, {
description: "valid quantity",
annotations: map[string]string{nodeGroupInstanceCPUCapacity: "123m"},
annotations: map[string]string{cpuKey: "123m"},
expectedError: false,
expectedQuantity: resource.MustParse("123m"),
}} {
Expand Down Expand Up @@ -497,12 +499,12 @@ func TestParseMemoryCapacity(t *testing.T) {
expectedError: false,
}, {
description: "bad quantity",
annotations: map[string]string{nodeGroupInstanceMemoryCapacity: "not-a-quantity"},
annotations: map[string]string{memoryKey: "not-a-quantity"},
expectedQuantity: zeroQuantity.DeepCopy(),
expectedError: true,
}, {
description: "valid quantity",
annotations: map[string]string{nodeGroupInstanceMemoryCapacity: "456Mi"},
annotations: map[string]string{memoryKey: "456Mi"},
expectedError: false,
expectedQuantity: resource.MustParse("456Mi"),
}} {
Expand All @@ -518,7 +520,7 @@ func TestParseMemoryCapacity(t *testing.T) {
}
}

func TestParsePodCapacity(t *testing.T) {
func TestParseGPUCapacity(t *testing.T) {
for _, tc := range []struct {
description string
annotations map[string]string
Expand All @@ -535,17 +537,17 @@ func TestParsePodCapacity(t *testing.T) {
expectedError: false,
}, {
description: "bad quantity",
annotations: map[string]string{nodeGroupInstancePodCapacity: "not-a-quantity"},
annotations: map[string]string{gpuKey: "not-a-quantity"},
expectedQuantity: zeroQuantity.DeepCopy(),
expectedError: true,
}, {
description: "valid quantity",
annotations: map[string]string{nodeGroupInstancePodCapacity: "42"},
annotations: map[string]string{gpuKey: "13"},
expectedError: false,
expectedQuantity: resource.MustParse("42"),
expectedQuantity: resource.MustParse("13"),
}} {
t.Run(tc.description, func(t *testing.T) {
got, err := parsePodCapacity(tc.annotations)
got, err := parseGPUCapacity(tc.annotations)
if tc.expectedError && err == nil {
t.Fatal("expected an error")
}
Expand All @@ -556,7 +558,7 @@ func TestParsePodCapacity(t *testing.T) {
}
}

func TestParseGPUCapacity(t *testing.T) {
func TestParseMaxPodsCapacity(t *testing.T) {
for _, tc := range []struct {
description string
annotations map[string]string
Expand All @@ -573,17 +575,17 @@ func TestParseGPUCapacity(t *testing.T) {
expectedError: false,
}, {
description: "bad quantity",
annotations: map[string]string{nodeGroupInstanceGPUCapacity: "not-a-quantity"},
annotations: map[string]string{maxPodsKey: "not-a-quantity"},
expectedQuantity: zeroQuantity.DeepCopy(),
expectedError: true,
}, {
description: "valid quantity",
annotations: map[string]string{nodeGroupInstanceGPUCapacity: "13"},
annotations: map[string]string{maxPodsKey: "13"},
expectedError: false,
expectedQuantity: resource.MustParse("13"),
}} {
t.Run(tc.description, func(t *testing.T) {
got, err := parseGPUCapacity(tc.annotations)
got, err := parseMaxPodsCapacity(tc.annotations)
if tc.expectedError && err == nil {
t.Fatal("expected an error")
}
Expand Down

0 comments on commit 9c4af16

Please sign in to comment.