-
Notifications
You must be signed in to change notification settings - Fork 38.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add tests for ignoring scheduler processing #121783
Changes from all commits
6073d1c
73565cd
80fbc4c
ec89154
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -92,6 +92,8 @@ const ( | |
highPriorityClassName = "high-priority" | ||
|
||
gpuLabel = "cloud.google.com/gke-accelerator" | ||
|
||
nonExistingBypassedSchedulerName = "non-existing-bypassed-scheduler" | ||
) | ||
|
||
var _ = SIGDescribe("Cluster size autoscaling", framework.WithSlow(), func() { | ||
|
@@ -1000,6 +1002,52 @@ var _ = SIGDescribe("Cluster size autoscaling", framework.WithSlow(), func() { | |
framework.ExpectNoError(WaitForClusterSizeFunc(ctx, f.ClientSet, | ||
func(size int) bool { return size == increasedSize }, time.Second)) | ||
}) | ||
|
||
f.It("should scale up when unprocessed pod is created and is going to be unschedulable", feature.ClusterScaleUpBypassScheduler, func(ctx context.Context) { | ||
// 70% of allocatable memory of a single node * replica count, forcing a scale up in case of normal pods | ||
replicaCount := 2 * nodeCount | ||
reservedMemory := int(float64(replicaCount) * float64(0.7) * float64(memAllocatableMb)) | ||
cleanupFunc := ReserveMemoryWithSchedulerName(ctx, f, "memory-reservation", replicaCount, reservedMemory, false, 1, nonExistingBypassedSchedulerName) | ||
defer framework.ExpectNoError(cleanupFunc()) | ||
// Verify that cluster size is increased | ||
ginkgo.By("Waiting for cluster scale-up") | ||
sizeFunc := func(size int) bool { | ||
// Softly checks scale-up since other types of machines can be added which would affect #nodes | ||
return size > nodeCount | ||
} | ||
framework.ExpectNoError(WaitForClusterSizeFuncWithUnready(ctx, f.ClientSet, sizeFunc, scaleUpTimeout, 0)) | ||
}) | ||
f.It("shouldn't scale up when unprocessed pod is created and is going to be schedulable", feature.ClusterScaleUpBypassScheduler, func(ctx context.Context) { | ||
// 50% of allocatable memory of a single node, so that no scale up would trigger in normal cases | ||
replicaCount := 1 | ||
reservedMemory := int(float64(0.5) * float64(memAllocatableMb)) | ||
cleanupFunc := ReserveMemoryWithSchedulerName(ctx, f, "memory-reservation", replicaCount, reservedMemory, false, 1, nonExistingBypassedSchedulerName) | ||
defer framework.ExpectNoError(cleanupFunc()) | ||
// Verify that cluster size is the same | ||
ginkgo.By(fmt.Sprintf("Waiting for scale up hoping it won't happen, polling cluster size for %s", scaleUpTimeout.String())) | ||
sizeFunc := func(size int) bool { | ||
return size == nodeCount | ||
} | ||
gomega.Consistently(ctx, func() error { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Consistently or Eventually? https://onsi.github.io/gomega/#eventually |
||
return WaitForClusterSizeFunc(ctx, f.ClientSet, sizeFunc, time.Second) | ||
}).WithTimeout(scaleUpTimeout).WithPolling(framework.Poll).ShouldNot(gomega.HaveOccurred()) | ||
}) | ||
f.It("shouldn't scale up when unprocessed pod is created and scheduler is not specified to be bypassed", feature.ClusterScaleUpBypassScheduler, func(ctx context.Context) { | ||
// 70% of allocatable memory of a single node * replica count, forcing a scale up in case of normal pods | ||
replicaCount := 2 * nodeCount | ||
reservedMemory := int(float64(replicaCount) * float64(0.7) * float64(memAllocatableMb)) | ||
schedulerName := "non-existent-scheduler-" + f.UniqueName | ||
cleanupFunc := ReserveMemoryWithSchedulerName(ctx, f, "memory-reservation", replicaCount, reservedMemory, false, 1, schedulerName) | ||
defer framework.ExpectNoError(cleanupFunc()) | ||
// Verify that cluster size is the same | ||
ginkgo.By(fmt.Sprintf("Waiting for scale up hoping it won't happen, polling cluster size for %s", scaleUpTimeout.String())) | ||
sizeFunc := func(size int) bool { | ||
return size == nodeCount | ||
} | ||
gomega.Consistently(ctx, func() error { | ||
return WaitForClusterSizeFunc(ctx, f.ClientSet, sizeFunc, time.Second) | ||
}).WithTimeout(scaleUpTimeout).WithPolling(framework.Poll).ShouldNot(gomega.HaveOccurred()) | ||
}) | ||
}) | ||
|
||
func installNvidiaDriversDaemonSet(ctx context.Context, f *framework.Framework) { | ||
|
@@ -1298,7 +1346,7 @@ func getPoolSize(ctx context.Context, f *framework.Framework, poolName string) i | |
return size | ||
} | ||
|
||
func reserveMemory(ctx context.Context, f *framework.Framework, id string, replicas, megabytes int, expectRunning bool, timeout time.Duration, selector map[string]string, tolerations []v1.Toleration, priorityClassName string) func() error { | ||
func reserveMemory(ctx context.Context, f *framework.Framework, id string, replicas, megabytes int, expectRunning bool, timeout time.Duration, selector map[string]string, tolerations []v1.Toleration, priorityClassName, schedulerName string) func() error { | ||
ginkgo.By(fmt.Sprintf("Running RC which reserves %v MB of memory", megabytes)) | ||
request := int64(1024 * 1024 * megabytes / replicas) | ||
config := &testutils.RCConfig{ | ||
|
@@ -1312,6 +1360,7 @@ func reserveMemory(ctx context.Context, f *framework.Framework, id string, repli | |
NodeSelector: selector, | ||
Tolerations: tolerations, | ||
PriorityClassName: priorityClassName, | ||
SchedulerName: schedulerName, | ||
} | ||
for start := time.Now(); time.Since(start) < rcCreationRetryTimeout; time.Sleep(rcCreationRetryDelay) { | ||
err := e2erc.RunRC(ctx, *config) | ||
|
@@ -1333,19 +1382,25 @@ func reserveMemory(ctx context.Context, f *framework.Framework, id string, repli | |
// ReserveMemoryWithPriority creates a replication controller with pods with priority that, in summation, | ||
// request the specified amount of memory. | ||
func ReserveMemoryWithPriority(ctx context.Context, f *framework.Framework, id string, replicas, megabytes int, expectRunning bool, timeout time.Duration, priorityClassName string) func() error { | ||
return reserveMemory(ctx, f, id, replicas, megabytes, expectRunning, timeout, nil, nil, priorityClassName) | ||
return reserveMemory(ctx, f, id, replicas, megabytes, expectRunning, timeout, nil, nil, priorityClassName, "") | ||
} | ||
|
||
// ReserveMemoryWithSelectorAndTolerations creates a replication controller with pods with node selector that, in summation, | ||
// request the specified amount of memory. | ||
func ReserveMemoryWithSelectorAndTolerations(ctx context.Context, f *framework.Framework, id string, replicas, megabytes int, expectRunning bool, timeout time.Duration, selector map[string]string, tolerations []v1.Toleration) func() error { | ||
return reserveMemory(ctx, f, id, replicas, megabytes, expectRunning, timeout, selector, tolerations, "") | ||
return reserveMemory(ctx, f, id, replicas, megabytes, expectRunning, timeout, selector, tolerations, "", "") | ||
} | ||
|
||
// ReserveMemoryWithSchedulerName creates a replication controller with pods with scheduler name that, in summation, | ||
// request the specified amount of memory. | ||
func ReserveMemoryWithSchedulerName(ctx context.Context, f *framework.Framework, id string, replicas, megabytes int, expectRunning bool, timeout time.Duration, schedulerName string) func() error { | ||
return reserveMemory(ctx, f, id, replicas, megabytes, expectRunning, timeout, nil, nil, "", schedulerName) | ||
} | ||
|
||
// ReserveMemory creates a replication controller with pods that, in summation, | ||
// request the specified amount of memory. | ||
func ReserveMemory(ctx context.Context, f *framework.Framework, id string, replicas, megabytes int, expectRunning bool, timeout time.Duration) func() error { | ||
return reserveMemory(ctx, f, id, replicas, megabytes, expectRunning, timeout, nil, nil, "") | ||
return reserveMemory(ctx, f, id, replicas, megabytes, expectRunning, timeout, nil, nil, "", "") | ||
} | ||
|
||
// WaitForClusterSizeFunc waits until the cluster size matches the given function. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -132,6 +132,7 @@ var ( | |
Windows = framework.WithFeature(framework.ValidFeatures.Add("Windows")) | ||
WindowsHostProcessContainers = framework.WithFeature(framework.ValidFeatures.Add("WindowsHostProcessContainers")) | ||
WindowsHyperVContainers = framework.WithFeature(framework.ValidFeatures.Add("WindowsHyperVContainers")) | ||
ClusterScaleUpBypassScheduler = framework.WithFeature(framework.ValidFeatures.Add("ClusterScaleUpBypassScheduler")) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In the future please keep this in alphabetical order (#123260 will make that more obvious). |
||
) | ||
|
||
func init() { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This test should have been marked as "slow" because it blocks for 5 minutes (=
scaleUpTimeout
).