Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Autoscaling tests fix for 1.10 #70022

Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
236 changes: 1 addition & 235 deletions test/e2e/autoscaling/cluster_size_autoscaling.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,16 +128,10 @@ var _ = SIGDescribe("Cluster size autoscaling [Slow]", func() {
err = enableAutoscaler("default-pool", 3, 5)
framework.ExpectNoError(err)
}
Expect(getNAPNodePoolsNumber()).Should(Equal(0))
}
})

AfterEach(func() {
if framework.ProviderIs("gke") {
By("Remove changes introduced by NAP tests")
removeNAPNodePools()
disableAutoprovisioning()
}
By(fmt.Sprintf("Restoring initial size of the cluster"))
setMigSizes(originalSizes)
expectedNodes := 0
Expand Down Expand Up @@ -792,103 +786,6 @@ var _ = SIGDescribe("Cluster size autoscaling [Slow]", func() {
framework.ExpectNoError(framework.WaitForReadyNodes(c, len(nodes.Items), nodesRecoverTimeout))
})

It("should add new node and new node pool on too big pod, scale down to 1 and scale down to 0 [Feature:ClusterSizeAutoscalingScaleWithNAP]", func() {
framework.SkipUnlessProviderIs("gke")
framework.ExpectNoError(enableAutoprovisioning(""))
By("Create first pod")
cleanupFunc1 := ReserveMemory(f, "memory-reservation1", 1, int(1.1*float64(memAllocatableMb)), true, defaultTimeout)
defer func() {
if cleanupFunc1 != nil {
cleanupFunc1()
}
}()
By("Waiting for scale up")
// Verify that cluster size increased.
framework.ExpectNoError(WaitForClusterSizeFunc(f.ClientSet,
func(size int) bool { return size == nodeCount+1 }, defaultTimeout))
By("Check if NAP group was created")
Expect(getNAPNodePoolsNumber()).Should(Equal(1))
By("Create second pod")
cleanupFunc2 := ReserveMemory(f, "memory-reservation2", 1, int(1.1*float64(memAllocatableMb)), true, defaultTimeout)
defer func() {
if cleanupFunc2 != nil {
cleanupFunc2()
}
}()
By("Waiting for scale up")
// Verify that cluster size increased.
framework.ExpectNoError(WaitForClusterSizeFunc(f.ClientSet,
func(size int) bool { return size == nodeCount+2 }, defaultTimeout))
By("Delete first pod")
cleanupFunc1()
cleanupFunc1 = nil
By("Waiting for scale down to 1")
// Verify that cluster size decreased.
framework.ExpectNoError(WaitForClusterSizeFunc(f.ClientSet,
func(size int) bool { return size == nodeCount+1 }, scaleDownTimeout))
By("Delete second pod")
cleanupFunc2()
cleanupFunc2 = nil
By("Waiting for scale down to 0")
// Verify that cluster size decreased.
framework.ExpectNoError(WaitForClusterSizeFunc(f.ClientSet,
func(size int) bool { return size == nodeCount }, scaleDownTimeout))
By("Waiting for NAP group remove")
framework.ExpectNoError(waitTillAllNAPNodePoolsAreRemoved())
By("Check if NAP group was removeed")
Expect(getNAPNodePoolsNumber()).Should(Equal(0))
})

It("shouldn't add new node group if not needed [Feature:ClusterSizeAutoscalingScaleWithNAP]", func() {
framework.SkipUnlessProviderIs("gke")
framework.ExpectNoError(enableAutoprovisioning(""))
By("Create pods")
// Create nodesCountAfterResize+1 pods allocating 0.7 allocatable on present nodes. One more node will have to be created.
cleanupFunc := ReserveMemory(f, "memory-reservation", nodeCount+1, int(float64(nodeCount+1)*float64(0.7)*float64(memAllocatableMb)), true, scaleUpTimeout)
defer cleanupFunc()
By("Waiting for scale up")
// Verify that cluster size increased.
framework.ExpectNoError(WaitForClusterSizeFunc(f.ClientSet,
func(size int) bool { return size >= nodeCount+1 }, scaleUpTimeout))
By("Check if NAP group was created hoping id didn't happen")
Expect(getNAPNodePoolsNumber()).Should(Equal(0))
})

It("shouldn't scale up if cores limit too low, should scale up after limit is changed [Feature:ClusterSizeAutoscalingScaleWithNAP]", func() {
framework.SkipUnlessProviderIs("gke")
By(fmt.Sprintf("Set core limit to %d", coreCount))
framework.ExpectNoError(enableAutoprovisioning(fmt.Sprintf(`"resource_limits":{"name":"cpu", "minimum":2, "maximum":%d}, "resource_limits":{"name":"memory", "minimum":0, "maximum":10000000}`, coreCount)))
// Create pod allocating 1.1 allocatable for present nodes. Bigger node will have to be created.
cleanupFunc := ReserveMemory(f, "memory-reservation", 1, int(1.1*float64(memAllocatableMb)), false, time.Second)
defer cleanupFunc()
By(fmt.Sprintf("Waiting for scale up hoping it won't happen, sleep for %s", scaleUpTimeout.String()))
time.Sleep(scaleUpTimeout)
// Verify that cluster size is not changed
framework.ExpectNoError(WaitForClusterSizeFunc(f.ClientSet,
func(size int) bool { return size == nodeCount }, time.Second))
By("Change resource limits")
framework.ExpectNoError(enableAutoprovisioning(fmt.Sprintf(`"resource_limits":{"name":"cpu", "minimum":2, "maximum":%d}, "resource_limits":{"name":"memory", "minimum":0, "maximum":10000000}`, coreCount+5)))
By("Wait for scale up")
framework.ExpectNoError(WaitForClusterSizeFunc(f.ClientSet,
func(size int) bool { return size == nodeCount+1 }, scaleUpTimeout))
By("Check if NAP group was created")
Expect(getNAPNodePoolsNumber()).Should(Equal(1))
})

It("should create new node if there is no node for node selector [Feature:ClusterSizeAutoscalingScaleWithNAP]", func() {
framework.SkipUnlessProviderIs("gke")
framework.ExpectNoError(enableAutoprovisioning(""))
// Create pod allocating 0.7 allocatable for present nodes with node selector.
cleanupFunc := ReserveMemoryWithSelector(f, "memory-reservation", 1, int(0.7*float64(memAllocatableMb)), true, scaleUpTimeout, map[string]string{"test": "test"})
defer cleanupFunc()
By("Waiting for scale up")
// Verify that cluster size increased.
framework.ExpectNoError(WaitForClusterSizeFunc(f.ClientSet,
func(size int) bool { return size == nodeCount+1 }, defaultTimeout))
By("Check if NAP group was created")
Expect(getNAPNodePoolsNumber()).Should(Equal(1))
})

It("shouldn't scale up when expendable pod is created [Feature:ClusterSizeAutoscalingScaleUp]", func() {
// TODO(krzysztof_jastrzebski): Start running this test on GKE when Pod Priority and Preemption is in beta.
framework.SkipUnlessProviderIs("gce")
Expand Down Expand Up @@ -1140,17 +1037,6 @@ func disableAutoscaler(nodePool string, minCount, maxCount int) error {
return fmt.Errorf("autoscaler still enabled, last error: %v", finalErr)
}

func isAutoprovisioningEnabled() (bool, error) {
strBody, err := getCluster("v1alpha1")
if err != nil {
return false, err
}
if strings.Contains(strBody, "\"enableNodeAutoprovisioning\": true") {
return true, nil
}
return false, nil
}

func executeHTTPRequest(method string, url string, body string) (string, error) {
client := &http.Client{}
req, err := http.NewRequest(method, url, strings.NewReader(body))
Expand All @@ -1170,126 +1056,6 @@ func executeHTTPRequest(method string, url string, body string) (string, error)
return string(respBody), nil
}

func enableAutoprovisioning(resourceLimits string) error {
By("Using API to enable autoprovisioning.")
var body string
if resourceLimits != "" {
body = fmt.Sprintf(`{"update": {"desired_cluster_autoscaling": {"enable_node_autoprovisioning": true, %s}}}`, resourceLimits)
} else {
body = `{"update": {"desired_cluster_autoscaling": {"enable_node_autoprovisioning": true, "resource_limits":{"name":"cpu", "minimum":0, "maximum":100}, "resource_limits":{"name":"memory", "minimum":0, "maximum":10000000}}}}`
}
_, err := executeHTTPRequest(http.MethodPut, getGKEClusterURL("v1alpha1"), body)
if err != nil {
glog.Errorf("Request error: %s", err.Error())
return err
}
glog.Infof("Wait for enabling autoprovisioning.")
for start := time.Now(); time.Since(start) < gkeUpdateTimeout; time.Sleep(30 * time.Second) {
enabled, err := isAutoprovisioningEnabled()
if err != nil {
glog.Errorf("Error: %s", err.Error())
return err
}
if enabled {
By("Autoprovisioning enabled.")
return nil
}
glog.Infof("Waiting for enabling autoprovisioning")
}
return fmt.Errorf("autoprovisioning wasn't enabled (timeout).")
}

func disableAutoprovisioning() error {
enabled, err := isAutoprovisioningEnabled()
if err != nil {
glog.Errorf("Error: %s", err.Error())
return err
}
if !enabled {
By("Autoprovisioning disabled.")
return nil
}
By("Using API to disable autoprovisioning.")
_, err = executeHTTPRequest(http.MethodPut, getGKEClusterURL("v1alpha1"), "{\"update\": {\"desired_cluster_autoscaling\": {}}}")
if err != nil {
glog.Errorf("Request error: %s", err.Error())
return err
}
By("Wait for disabling autoprovisioning.")
for start := time.Now(); time.Since(start) < gkeUpdateTimeout; time.Sleep(30 * time.Second) {
enabled, err := isAutoprovisioningEnabled()
if err != nil {
glog.Errorf("Error: %s", err.Error())
return err
}
if !enabled {
By("Autoprovisioning disabled.")
return nil
}
By("Waiting for disabling autoprovisioning")
}
return fmt.Errorf("autoprovisioning wasn't disabled (timeout).")
}

func getNAPNodePools() ([]string, error) {
if framework.ProviderIs("gke") {
args := []string{"container", "node-pools", "list", "--cluster=" + framework.TestContext.CloudConfig.Cluster}
output, err := execCmd(getGcloudCommand(args)...).CombinedOutput()
if err != nil {
glog.Errorf("Failed to get instance groups: %v", string(output))
return nil, err
}
re := regexp.MustCompile("nap.* ")
lines := re.FindAllString(string(output), -1)
for i, line := range lines {
lines[i] = line[:strings.Index(line, " ")]
}
return lines, nil
} else {
return nil, fmt.Errorf("provider does not support NAP")
}
}

func removeNAPNodePools() error {
By("Remove NAP node pools")
pools, err := getNAPNodePools()
if err != nil {
return err
}
for _, pool := range pools {
By("Remove node pool: " + pool)
suffix := fmt.Sprintf("projects/%s/zones/%s/clusters/%s/nodePools/%s",
framework.TestContext.CloudConfig.ProjectID,
framework.TestContext.CloudConfig.Zone,
framework.TestContext.CloudConfig.Cluster,
pool)
_, err := executeHTTPRequest(http.MethodDelete, getGKEURL("v1alpha1", suffix), "")
if err != nil {
glog.Errorf("Request error: %s", err.Error())
return err
}
}
err = waitTillAllNAPNodePoolsAreRemoved()
if err != nil {
glog.Errorf(fmt.Sprintf("Couldn't remove NAP groups: %s", err.Error()))
}
return err
}

func getNAPNodePoolsNumber() int {
groups, err := getNAPNodePools()
framework.ExpectNoError(err)
return len(groups)
}

func waitTillAllNAPNodePoolsAreRemoved() error {
By("Wait till all NAP node pools are removed")
err := wait.PollImmediate(5*time.Second, defaultTimeout, func() (bool, error) {
return getNAPNodePoolsNumber() == 0, nil
})
return err
}

func addNodePool(name string, machineType string, numNodes int) {
args := []string{"container", "node-pools", "create", name, "--quiet",
"--machine-type=" + machineType,
Expand Down Expand Up @@ -1853,7 +1619,7 @@ type scaleUpStatus struct {
// Try to get timestamp from status.
// Status configmap is not parsing-friendly, so evil regexpery follows.
func getStatusTimestamp(status string) (time.Time, error) {
timestampMatcher, err := regexp.Compile("Cluster-autoscaler status at \\s*([0-9\\-]+ [0-9]+:[0-9]+:[0-9]+\\.[0-9]+ \\+[0-9]+ [A-Za-z]+):")
timestampMatcher, err := regexp.Compile("Cluster-autoscaler status at \\s*([0-9\\-]+ [0-9]+:[0-9]+:[0-9]+\\.[0-9]+ \\+[0-9]+ [A-Za-z]+)")
if err != nil {
return time.Time{}, err
}
Expand Down