-
Notifications
You must be signed in to change notification settings - Fork 38.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[hpa] Parameterize tolerance, downscale, and upscale into HPAController, and add corresponding unit test for backsolved tolerance. #18315
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,6 +20,7 @@ import ( | |
"encoding/json" | ||
"fmt" | ||
"io" | ||
"math" | ||
"testing" | ||
"time" | ||
|
||
|
@@ -32,9 +33,14 @@ import ( | |
"k8s.io/kubernetes/pkg/controller/podautoscaler/metrics" | ||
"k8s.io/kubernetes/pkg/runtime" | ||
|
||
glog "github.com/golang/glog" | ||
"github.com/stretchr/testify/assert" | ||
heapster "k8s.io/heapster/api/v1/types" | ||
) | ||
|
||
"github.com/stretchr/testify/assert" | ||
// unit tests need tolerance awareness to calibrate. | ||
const ( | ||
tolerance = .1 | ||
) | ||
|
||
func (w fakeResponseWrapper) DoRaw() ([]byte, error) { | ||
|
@@ -206,7 +212,7 @@ func (tc *testCase) verifyResults(t *testing.T) { | |
func (tc *testCase) runTest(t *testing.T) { | ||
testClient := tc.prepareTestClient(t) | ||
metricsClient := metrics.NewHeapsterMetricsClient(testClient, metrics.DefaultHeapsterNamespace, metrics.DefaultHeapsterScheme, metrics.DefaultHeapsterService, metrics.DefaultHeapsterPort) | ||
hpaController := NewHorizontalController(testClient, metricsClient) | ||
hpaController := NewHorizontalController(testClient, metricsClient, tolerance, time.Second, time.Second) | ||
err := hpaController.reconcileAutoscalers() | ||
assert.Equal(t, nil, err) | ||
if tc.verifyEvents { | ||
|
@@ -360,4 +366,64 @@ func TestEventNotCreated(t *testing.T) { | |
tc.runTest(t) | ||
} | ||
|
||
// TODO: add more tests | ||
// TestComputedToleranceAlgImplementation is a regression test which | ||
// back-calculates a minimal percentage for downscaling based on a small percentage | ||
// increase in pod utilization which is calibrated against the tolerance value. | ||
func TestComputedToleranceAlgImplementation(t *testing.T) { | ||
|
||
startPods := 10 | ||
// 150 mCPU per pod. | ||
totalUsedCPUOfAllPods := uint64(startPods * 150) | ||
// Each pod starts out asking for 2X what is really needed. | ||
// This means we will have a 50% ratio of used/requested | ||
totalRequestedCPUOfAllPods := 2 * totalUsedCPUOfAllPods | ||
requestedToUsed := float64(totalRequestedCPUOfAllPods / totalUsedCPUOfAllPods) | ||
// Spread the amount we ask over 10 pods. We can add some jitter later in reportedLevels. | ||
perPodRequested := int(totalRequestedCPUOfAllPods) / startPods | ||
|
||
// Force a minimal scaling event by satisfying (tolerance < 1 - resourcesUsedRatio). | ||
target := math.Abs(1/(requestedToUsed*(1-tolerance))) + .01 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jszczepkowski 1-tolerance works as well. it just changes the sign, which is a no-op since we're absolute-valuing. I've updated it from |
||
finalCpuPercentTarget := int(target * 100) | ||
resourcesUsedRatio := float64(totalUsedCPUOfAllPods) / float64(float64(totalRequestedCPUOfAllPods)*target) | ||
// the autoscaler will compare this vs. tolearnce. Lets calculate the usageRatio, which will be | ||
// compared w tolerance. | ||
usageRatioToleranceValue := float64(1 - resourcesUsedRatio) | ||
// i.e. .60 * 20 -> scaled down expectation. | ||
finalPods := math.Ceil(resourcesUsedRatio * float64(startPods)) | ||
|
||
glog.Infof("To breach tolerance %f we will create a utilization ratio difference of %f", tolerance, usageRatioToleranceValue) | ||
tc := testCase{ | ||
minReplicas: 0, | ||
maxReplicas: 1000, | ||
initialReplicas: startPods, | ||
desiredReplicas: int(finalPods), | ||
CPUTarget: finalCpuPercentTarget, | ||
reportedLevels: []uint64{ | ||
totalUsedCPUOfAllPods / 10, | ||
totalUsedCPUOfAllPods / 10, | ||
totalUsedCPUOfAllPods / 10, | ||
totalUsedCPUOfAllPods / 10, | ||
totalUsedCPUOfAllPods / 10, | ||
totalUsedCPUOfAllPods / 10, | ||
totalUsedCPUOfAllPods / 10, | ||
totalUsedCPUOfAllPods / 10, | ||
totalUsedCPUOfAllPods / 10, | ||
totalUsedCPUOfAllPods / 10, | ||
}, | ||
reportedCPURequests: []resource.Quantity{ | ||
resource.MustParse(fmt.Sprint(perPodRequested+100) + "m"), | ||
resource.MustParse(fmt.Sprint(perPodRequested-100) + "m"), | ||
resource.MustParse(fmt.Sprint(perPodRequested+10) + "m"), | ||
resource.MustParse(fmt.Sprint(perPodRequested-10) + "m"), | ||
resource.MustParse(fmt.Sprint(perPodRequested+2) + "m"), | ||
resource.MustParse(fmt.Sprint(perPodRequested-2) + "m"), | ||
resource.MustParse(fmt.Sprint(perPodRequested+1) + "m"), | ||
resource.MustParse(fmt.Sprint(perPodRequested-1) + "m"), | ||
resource.MustParse(fmt.Sprint(perPodRequested) + "m"), | ||
resource.MustParse(fmt.Sprint(perPodRequested) + "m"), | ||
}, | ||
} | ||
tc.runTest(t) | ||
} | ||
|
||
// TODO: add more tests, e.g., enforcement of upscal/downscale window. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@wojtek-t @jszczepkowski this is the culprit. I made the default tolerance was way too high (off by a decimal)!
I will fix and re-validate this manually. which were the specific autoscale tests that it broke? just for good measure ill paste results in the PR ...