Bug 1861642: Add maxNodeProvisionTime for baremetal

In baremetal environments it can easily take longer than the CA default of 15mins for the node to become active after a scale-out action. The CA supports --max-node-provision-time[1] so adding support to enable configuration of that value should allow tuning of the time such that it's more suited to baremetal. Accepted review suggestion to fix bug in regex Co-authored-by: Joel Speed <Joel.speed@hotmail.co.uk> [1] https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/FAQ.md#what-are-the-parameters-to-ca
openshift · Aug 5, 2020 · 6cde7f9 · 6cde7f9
1 parent d4e2b49
commit 6cde7f9
Show file tree

Hide file tree

Showing 4 changed files with 22 additions and 0 deletions.
diff --git a/install/01_clusterautoscaler.crd.yaml b/install/01_clusterautoscaler.crd.yaml
@@ -49,6 +49,14 @@ spec:
                   feature flag. Should CA ignore DaemonSet pods when calculating resource
                   utilization for scaling down. false by default
                 type: boolean
+              maxNodeProvisionTime:
+                description: Maximum time CA waits for node to be provisioned.
+                             Expects an unsigned duration string of decimal
+                             numbers each with optional fraction and a unit
+                             suffix, eg "300ms", "1.5h" or "2h45m". Valid time
+                             units are "ns", "us" (or "µs"), "ms", "s", "m", "h".
+                pattern: ^([0-9]+(\.[0-9]+)?(ns|us|µs|ms|s|m|h))+$
+                type: string
               maxPodGracePeriod:
                 description: Gives pods graceful termination time before scaling down
                 format: int32

diff --git a/pkg/apis/autoscaling/v1/clusterautoscaler_types.go b/pkg/apis/autoscaling/v1/clusterautoscaler_types.go
@@ -19,6 +19,10 @@ type ClusterAutoscalerSpec struct {
 	// Gives pods graceful termination time before scaling down
 	MaxPodGracePeriod *int32 `json:"maxPodGracePeriod,omitempty"`
 
+	// Maximum time CA waits for node to be provisioned
+	// +kubebuilder:validation:Pattern=([0-9]*(\.[0-9]*)?[a-z]+)+
+	MaxNodeProvisionTime string `json:"maxNodeProvisionTime,omitempty"`
+
 	// To allow users to schedule "best-effort" pods, which shouldn't trigger
 	// Cluster Autoscaler actions, but only run when there are spare resources available,
 	// More info: https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/FAQ.md#how-does-cluster-autoscaler-work-with-pod-priority-and-preemption

diff --git a/pkg/controller/clusterautoscaler/clusterautoscaler.go b/pkg/controller/clusterautoscaler/clusterautoscaler.go
@@ -44,6 +44,7 @@ const (
 	ScaleDownDelayAfterFailureArg   AutoscalerArg = "--scale-down-delay-after-failure"
 	ScaleDownUnneededTimeArg        AutoscalerArg = "--scale-down-unneeded-time"
 	MaxNodesTotalArg                AutoscalerArg = "--max-nodes-total"
+	MaxNodeProvisionTimeArg         AutoscalerArg = "--max-node-provision-time"
 	CoresTotalArg                   AutoscalerArg = "--cores-total"
 	MemoryTotalArg                  AutoscalerArg = "--memory-total"
 	GPUTotalArg                     AutoscalerArg = "--gpu-total"
@@ -71,6 +72,11 @@ func AutoscalerArgs(ca *v1.ClusterAutoscaler, cfg *Config) []string {
 		args = append(args, v)
 	}
 
+	if ca.Spec.MaxNodeProvisionTime != "" {
+		v := MaxNodeProvisionTimeArg.Value(s.MaxNodeProvisionTime)
+		args = append(args, v)
+	}
+
 	if ca.Spec.PodPriorityThreshold != nil {
 		v := ExpendablePodsPriorityCutoffArg.Value(*s.PodPriorityThreshold)
 		args = append(args, v)

diff --git a/pkg/controller/clusterautoscaler/clusterautoscaler_test.go b/pkg/controller/clusterautoscaler/clusterautoscaler_test.go
@@ -34,6 +34,7 @@ const (
 var (
 	ScaleDownUnneededTime        = "10s"
 	ScaleDownDelayAfterAdd       = "60s"
+	MaxNodeProvisionTime         = "30m"
 	PodPriorityThreshold   int32 = -10
 	MaxPodGracePeriod      int32 = 60
 	MaxNodesTotal          int32 = 100
@@ -146,6 +147,7 @@ func TestAutoscalerArgs(t *testing.T) {
 	expectedMissing := []string{
 		"--scale-down-delay-after-delete",
 		"--scale-down-delay-after-failure",
+		"--max-node-provision-time",
 		"--balance-similar-node-groups",
 		"--ignore-daemonsets-utilization",
 		"--skip-nodes-with-local-storage",
@@ -165,13 +167,15 @@ func TestAutoscalerArgEnabled(t *testing.T) {
 	ca.Spec.BalanceSimilarNodeGroups = pointer.BoolPtr(true)
 	ca.Spec.IgnoreDaemonsetsUtilization = pointer.BoolPtr(true)
 	ca.Spec.SkipNodesWithLocalStorage = pointer.BoolPtr(true)
+	ca.Spec.MaxNodeProvisionTime = MaxNodeProvisionTime
 
 	args := AutoscalerArgs(ca, &Config{CloudProvider: TestCloudProvider, Namespace: TestNamespace})
 
 	expected := []string{
 		fmt.Sprintf("--balance-similar-node-groups=true"),
 		fmt.Sprintf("--ignore-daemonsets-utilization=true"),
 		fmt.Sprintf("--skip-nodes-with-local-storage=true"),
+		fmt.Sprintf("--max-node-provision-time=%s", MaxNodeProvisionTime),
 	}
 
 	for _, e := range expected {