Bug 1861642: Add maxNodeProvisionTime for baremetal

In baremetal environments it can easily take longer than the CA default of 15mins for the node to become active after a scale-out action. The CA supports --max-node-provision-time[1] so adding support to enable configuration of that value should allow tuning of the time such that it's more suited to baremetal. [1] https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/FAQ.md#what-are-the-parameters-to-ca
openshift · Jul 31, 2020 · f8a7872 · f8a7872
1 parent 3790a11
commit f8a7872
Show file tree

Hide file tree

Showing 5 changed files with 22 additions and 0 deletions.
diff --git a/install/01_clusterautoscaler.crd.yaml b/install/01_clusterautoscaler.crd.yaml
@@ -5,6 +5,8 @@ kind: CustomResourceDefinition
 metadata:
   annotations:
     exclude.release.openshift.io/internal-openshift-hosted: "true"
+  annotations:
+    controller-gen.kubebuilder.io/version: v0.2.9-0.20200331153640-3c5446d407dd
   creationTimestamp: null
   name: clusterautoscalers.autoscaling.openshift.io
 spec:
@@ -49,6 +51,10 @@ spec:
                   feature flag. Should CA ignore DaemonSet pods when calculating resource
                   utilization for scaling down. false by default
                 type: boolean
+              maxNodeProvisionTime:
+                description: Maximum time CA waits for node to be provisioned
+                pattern: ([0-9]*(\.[0-9]*)?[a-z]+)+
+                type: string
               maxPodGracePeriod:
                 description: Gives pods graceful termination time before scaling down
                 format: int32

diff --git a/install/02_machineautoscaler.crd.yaml b/install/02_machineautoscaler.crd.yaml
@@ -5,6 +5,8 @@ kind: CustomResourceDefinition
 metadata:
   annotations:
     exclude.release.openshift.io/internal-openshift-hosted: "true"
+  annotations:
+    controller-gen.kubebuilder.io/version: v0.2.9-0.20200331153640-3c5446d407dd
   creationTimestamp: null
   name: machineautoscalers.autoscaling.openshift.io
 spec:

diff --git a/pkg/apis/autoscaling/v1/clusterautoscaler_types.go b/pkg/apis/autoscaling/v1/clusterautoscaler_types.go
@@ -19,6 +19,10 @@ type ClusterAutoscalerSpec struct {
 	// Gives pods graceful termination time before scaling down
 	MaxPodGracePeriod *int32 `json:"maxPodGracePeriod,omitempty"`
 
+	// Maximum time CA waits for node to be provisioned
+	// +kubebuilder:validation:Pattern=([0-9]*(\.[0-9]*)?[a-z]+)+
+	MaxNodeProvisionTime *string `json:"maxNodeProvisionTime,omitempty"`
+
 	// To allow users to schedule "best-effort" pods, which shouldn't trigger
 	// Cluster Autoscaler actions, but only run when there are spare resources available,
 	// More info: https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/FAQ.md#how-does-cluster-autoscaler-work-with-pod-priority-and-preemption

diff --git a/pkg/controller/clusterautoscaler/clusterautoscaler.go b/pkg/controller/clusterautoscaler/clusterautoscaler.go
@@ -44,6 +44,7 @@ const (
 	ScaleDownDelayAfterFailureArg   AutoscalerArg = "--scale-down-delay-after-failure"
 	ScaleDownUnneededTimeArg        AutoscalerArg = "--scale-down-unneeded-time"
 	MaxNodesTotalArg                AutoscalerArg = "--max-nodes-total"
+	MaxNodeProvisionTimeArg         AutoscalerArg = "--max-node-provision-time"
 	CoresTotalArg                   AutoscalerArg = "--cores-total"
 	MemoryTotalArg                  AutoscalerArg = "--memory-total"
 	GPUTotalArg                     AutoscalerArg = "--gpu-total"
@@ -71,6 +72,11 @@ func AutoscalerArgs(ca *v1.ClusterAutoscaler, cfg *Config) []string {
 		args = append(args, v)
 	}
 
+	if ca.Spec.MaxNodeProvisionTime != nil {
+		v := MaxNodeProvisionTimeArg.Value(*s.MaxNodeProvisionTime)
+		args = append(args, v)
+	}
+
 	if ca.Spec.PodPriorityThreshold != nil {
 		v := ExpendablePodsPriorityCutoffArg.Value(*s.PodPriorityThreshold)
 		args = append(args, v)

diff --git a/pkg/controller/clusterautoscaler/clusterautoscaler_test.go b/pkg/controller/clusterautoscaler/clusterautoscaler_test.go
@@ -34,6 +34,7 @@ const (
 var (
 	ScaleDownUnneededTime        = "10s"
 	ScaleDownDelayAfterAdd       = "60s"
+	MaxNodeProvisionTime         = "30m"
 	PodPriorityThreshold   int32 = -10
 	MaxPodGracePeriod      int32 = 60
 	MaxNodesTotal          int32 = 100
@@ -146,6 +147,7 @@ func TestAutoscalerArgs(t *testing.T) {
 	expectedMissing := []string{
 		"--scale-down-delay-after-delete",
 		"--scale-down-delay-after-failure",
+		"--max-node-provision-time",
 		"--balance-similar-node-groups",
 		"--ignore-daemonsets-utilization",
 		"--skip-nodes-with-local-storage",
@@ -165,13 +167,15 @@ func TestAutoscalerArgEnabled(t *testing.T) {
 	ca.Spec.BalanceSimilarNodeGroups = pointer.BoolPtr(true)
 	ca.Spec.IgnoreDaemonsetsUtilization = pointer.BoolPtr(true)
 	ca.Spec.SkipNodesWithLocalStorage = pointer.BoolPtr(true)
+	ca.Spec.MaxNodeProvisionTime = &MaxNodeProvisionTime
 
 	args := AutoscalerArgs(ca, &Config{CloudProvider: TestCloudProvider, Namespace: TestNamespace})
 
 	expected := []string{
 		fmt.Sprintf("--balance-similar-node-groups"),
 		fmt.Sprintf("--ignore-daemonsets-utilization"),
 		fmt.Sprintf("--skip-nodes-with-local-storage"),
+		fmt.Sprintf("--max-node-provision-time=%s", MaxNodeProvisionTime),
 	}
 
 	for _, e := range expected {