From 9418e8c24ce5e722f1f6ec3c0d517cccc994bdd6 Mon Sep 17 00:00:00 2001 From: shibataka000 Date: Sat, 9 Nov 2019 06:00:58 +0000 Subject: [PATCH] Fix bug about unintentional scale out during updating deployment. During rolling update with maxSurge=1 and maxUnavailable=0, len(metrics) is greater than currentReplcas and it may cause unintentional scale out. --- .../podautoscaler/replica_calculator.go | 8 +++++++- .../podautoscaler/replica_calculator_test.go | 18 ++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/pkg/controller/podautoscaler/replica_calculator.go b/pkg/controller/podautoscaler/replica_calculator.go index 85557a9820bf..01bc0b18185e 100644 --- a/pkg/controller/podautoscaler/replica_calculator.go +++ b/pkg/controller/podautoscaler/replica_calculator.go @@ -134,9 +134,15 @@ func (c *ReplicaCalculator) GetResourceReplicas(currentReplicas int32, targetUti return currentReplicas, utilization, rawUtilization, timestamp, nil } + newReplicas := int32(math.Ceil(newUsageRatio * float64(len(metrics)))) + if (newUsageRatio < 1.0 && newReplicas > currentReplicas) || (newUsageRatio > 1.0 && newReplicas < currentReplicas) { + // return the current replicas if the change of metrics length would cause a change in scale direction + return currentReplicas, utilization, rawUtilization, timestamp, nil + } + // return the result, where the number of replicas considered is // however many replicas factored into our calculation - return int32(math.Ceil(newUsageRatio * float64(len(metrics)))), utilization, rawUtilization, timestamp, nil + return newReplicas, utilization, rawUtilization, timestamp, nil } // GetRawResourceReplicas calculates the desired replica count based on a target resource utilization (as a raw milli-value) diff --git a/pkg/controller/podautoscaler/replica_calculator_test.go b/pkg/controller/podautoscaler/replica_calculator_test.go index 7d392eab8026..245df1872fef 100644 --- a/pkg/controller/podautoscaler/replica_calculator_test.go +++ b/pkg/controller/podautoscaler/replica_calculator_test.go @@ -1247,6 +1247,24 @@ func TestReplicaCalcMissingMetricsUnreadyScaleDown(t *testing.T) { tc.runTest(t) } +func TestReplicaCalcDuringRollingUpdateWithMaxSurge(t *testing.T) { + tc := replicaCalcTestCase{ + currentReplicas: 2, + expectedReplicas: 2, + podPhase: []v1.PodPhase{v1.PodRunning, v1.PodRunning, v1.PodRunning}, + resource: &resourceInfo{ + name: v1.ResourceCPU, + requests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")}, + levels: []int64{100, 100}, + + targetUtilization: 50, + expectedUtilization: 10, + expectedValue: numContainersPerPod * 100, + }, + } + tc.runTest(t) +} + // TestComputedToleranceAlgImplementation is a regression test which // back-calculates a minimal percentage for downscaling based on a small percentage // increase in pod utilization which is calibrated against the tolerance value.