Skip to content

Commit

Permalink
Merge pull request #121783 from atwamahmoud/ignore-scheduler-tests
Browse files Browse the repository at this point in the history
Add tests for ignoring scheduler processing
  • Loading branch information
k8s-ci-robot committed Jan 17, 2024
2 parents 05780d5 + ec89154 commit b26f6df
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 5 deletions.
63 changes: 59 additions & 4 deletions test/e2e/autoscaling/cluster_size_autoscaling.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,8 @@ const (
highPriorityClassName = "high-priority"

gpuLabel = "cloud.google.com/gke-accelerator"

nonExistingBypassedSchedulerName = "non-existing-bypassed-scheduler"
)

var _ = SIGDescribe("Cluster size autoscaling", framework.WithSlow(), func() {
Expand Down Expand Up @@ -1000,6 +1002,52 @@ var _ = SIGDescribe("Cluster size autoscaling", framework.WithSlow(), func() {
framework.ExpectNoError(WaitForClusterSizeFunc(ctx, f.ClientSet,
func(size int) bool { return size == increasedSize }, time.Second))
})

f.It("should scale up when unprocessed pod is created and is going to be unschedulable", feature.ClusterScaleUpBypassScheduler, func(ctx context.Context) {
// 70% of allocatable memory of a single node * replica count, forcing a scale up in case of normal pods
replicaCount := 2 * nodeCount
reservedMemory := int(float64(replicaCount) * float64(0.7) * float64(memAllocatableMb))
cleanupFunc := ReserveMemoryWithSchedulerName(ctx, f, "memory-reservation", replicaCount, reservedMemory, false, 1, nonExistingBypassedSchedulerName)
defer framework.ExpectNoError(cleanupFunc())
// Verify that cluster size is increased
ginkgo.By("Waiting for cluster scale-up")
sizeFunc := func(size int) bool {
// Softly checks scale-up since other types of machines can be added which would affect #nodes
return size > nodeCount
}
framework.ExpectNoError(WaitForClusterSizeFuncWithUnready(ctx, f.ClientSet, sizeFunc, scaleUpTimeout, 0))
})
f.It("shouldn't scale up when unprocessed pod is created and is going to be schedulable", feature.ClusterScaleUpBypassScheduler, func(ctx context.Context) {
// 50% of allocatable memory of a single node, so that no scale up would trigger in normal cases
replicaCount := 1
reservedMemory := int(float64(0.5) * float64(memAllocatableMb))
cleanupFunc := ReserveMemoryWithSchedulerName(ctx, f, "memory-reservation", replicaCount, reservedMemory, false, 1, nonExistingBypassedSchedulerName)
defer framework.ExpectNoError(cleanupFunc())
// Verify that cluster size is the same
ginkgo.By(fmt.Sprintf("Waiting for scale up hoping it won't happen, polling cluster size for %s", scaleUpTimeout.String()))
sizeFunc := func(size int) bool {
return size == nodeCount
}
gomega.Consistently(ctx, func() error {
return WaitForClusterSizeFunc(ctx, f.ClientSet, sizeFunc, time.Second)
}).WithTimeout(scaleUpTimeout).WithPolling(framework.Poll).ShouldNot(gomega.HaveOccurred())
})
f.It("shouldn't scale up when unprocessed pod is created and scheduler is not specified to be bypassed", feature.ClusterScaleUpBypassScheduler, func(ctx context.Context) {
// 70% of allocatable memory of a single node * replica count, forcing a scale up in case of normal pods
replicaCount := 2 * nodeCount
reservedMemory := int(float64(replicaCount) * float64(0.7) * float64(memAllocatableMb))
schedulerName := "non-existent-scheduler-" + f.UniqueName
cleanupFunc := ReserveMemoryWithSchedulerName(ctx, f, "memory-reservation", replicaCount, reservedMemory, false, 1, schedulerName)
defer framework.ExpectNoError(cleanupFunc())
// Verify that cluster size is the same
ginkgo.By(fmt.Sprintf("Waiting for scale up hoping it won't happen, polling cluster size for %s", scaleUpTimeout.String()))
sizeFunc := func(size int) bool {
return size == nodeCount
}
gomega.Consistently(ctx, func() error {
return WaitForClusterSizeFunc(ctx, f.ClientSet, sizeFunc, time.Second)
}).WithTimeout(scaleUpTimeout).WithPolling(framework.Poll).ShouldNot(gomega.HaveOccurred())
})
})

func installNvidiaDriversDaemonSet(ctx context.Context, f *framework.Framework) {
Expand Down Expand Up @@ -1298,7 +1346,7 @@ func getPoolSize(ctx context.Context, f *framework.Framework, poolName string) i
return size
}

func reserveMemory(ctx context.Context, f *framework.Framework, id string, replicas, megabytes int, expectRunning bool, timeout time.Duration, selector map[string]string, tolerations []v1.Toleration, priorityClassName string) func() error {
func reserveMemory(ctx context.Context, f *framework.Framework, id string, replicas, megabytes int, expectRunning bool, timeout time.Duration, selector map[string]string, tolerations []v1.Toleration, priorityClassName, schedulerName string) func() error {
ginkgo.By(fmt.Sprintf("Running RC which reserves %v MB of memory", megabytes))
request := int64(1024 * 1024 * megabytes / replicas)
config := &testutils.RCConfig{
Expand All @@ -1312,6 +1360,7 @@ func reserveMemory(ctx context.Context, f *framework.Framework, id string, repli
NodeSelector: selector,
Tolerations: tolerations,
PriorityClassName: priorityClassName,
SchedulerName: schedulerName,
}
for start := time.Now(); time.Since(start) < rcCreationRetryTimeout; time.Sleep(rcCreationRetryDelay) {
err := e2erc.RunRC(ctx, *config)
Expand All @@ -1333,19 +1382,25 @@ func reserveMemory(ctx context.Context, f *framework.Framework, id string, repli
// ReserveMemoryWithPriority creates a replication controller with pods with priority that, in summation,
// request the specified amount of memory.
func ReserveMemoryWithPriority(ctx context.Context, f *framework.Framework, id string, replicas, megabytes int, expectRunning bool, timeout time.Duration, priorityClassName string) func() error {
return reserveMemory(ctx, f, id, replicas, megabytes, expectRunning, timeout, nil, nil, priorityClassName)
return reserveMemory(ctx, f, id, replicas, megabytes, expectRunning, timeout, nil, nil, priorityClassName, "")
}

// ReserveMemoryWithSelectorAndTolerations creates a replication controller with pods with node selector that, in summation,
// request the specified amount of memory.
func ReserveMemoryWithSelectorAndTolerations(ctx context.Context, f *framework.Framework, id string, replicas, megabytes int, expectRunning bool, timeout time.Duration, selector map[string]string, tolerations []v1.Toleration) func() error {
return reserveMemory(ctx, f, id, replicas, megabytes, expectRunning, timeout, selector, tolerations, "")
return reserveMemory(ctx, f, id, replicas, megabytes, expectRunning, timeout, selector, tolerations, "", "")
}

// ReserveMemoryWithSchedulerName creates a replication controller with pods with scheduler name that, in summation,
// request the specified amount of memory.
func ReserveMemoryWithSchedulerName(ctx context.Context, f *framework.Framework, id string, replicas, megabytes int, expectRunning bool, timeout time.Duration, schedulerName string) func() error {
return reserveMemory(ctx, f, id, replicas, megabytes, expectRunning, timeout, nil, nil, "", schedulerName)
}

// ReserveMemory creates a replication controller with pods that, in summation,
// request the specified amount of memory.
func ReserveMemory(ctx context.Context, f *framework.Framework, id string, replicas, megabytes int, expectRunning bool, timeout time.Duration) func() error {
return reserveMemory(ctx, f, id, replicas, megabytes, expectRunning, timeout, nil, nil, "")
return reserveMemory(ctx, f, id, replicas, megabytes, expectRunning, timeout, nil, nil, "", "")
}

// WaitForClusterSizeFunc waits until the cluster size matches the given function.
Expand Down
1 change: 1 addition & 0 deletions test/e2e/feature/feature.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ var (
Windows = framework.WithFeature(framework.ValidFeatures.Add("Windows"))
WindowsHostProcessContainers = framework.WithFeature(framework.ValidFeatures.Add("WindowsHostProcessContainers"))
WindowsHyperVContainers = framework.WithFeature(framework.ValidFeatures.Add("WindowsHyperVContainers"))
ClusterScaleUpBypassScheduler = framework.WithFeature(framework.ValidFeatures.Add("ClusterScaleUpBypassScheduler"))
)

func init() {
Expand Down
4 changes: 3 additions & 1 deletion test/utils/runners.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ type RCConfig struct {
PriorityClassName string
TerminationGracePeriodSeconds *int64
Lifecycle *v1.Lifecycle
SchedulerName string

// Env vars, set the same for every pod.
Env map[string]string
Expand Down Expand Up @@ -615,7 +616,8 @@ func (config *RCConfig) create() error {
Annotations: config.Annotations,
},
Spec: v1.PodSpec{
Affinity: config.Affinity,
SchedulerName: config.SchedulerName,
Affinity: config.Affinity,
Containers: []v1.Container{
{
Name: config.Name,
Expand Down

0 comments on commit b26f6df

Please sign in to comment.