kubernetes-sigs · k8s-ci-robot · May 13, 2024 · May 8, 2024 · May 9, 2024 · May 9, 2024
diff --git a/pkg/scheduler/scheduler.go b/pkg/scheduler/scheduler.go
@@ -30,7 +30,6 @@ import (
 	"k8s.io/apimachinery/pkg/labels"
 	"k8s.io/apimachinery/pkg/types"
 	"k8s.io/apimachinery/pkg/util/sets"
-	"k8s.io/apimachinery/pkg/util/wait"
 	"k8s.io/client-go/tools/record"
 	"k8s.io/klog/v2"
 	"k8s.io/utils/field"
@@ -51,6 +50,7 @@ import (
 	"sigs.k8s.io/kueue/pkg/util/priority"
 	"sigs.k8s.io/kueue/pkg/util/resource"
 	"sigs.k8s.io/kueue/pkg/util/routine"
+	"sigs.k8s.io/kueue/pkg/util/wait"
 	"sigs.k8s.io/kueue/pkg/workload"
 )
 
@@ -125,7 +125,7 @@ func New(queues *queue.Manager, cache *cache.Cache, cl client.Client, recorder r
 func (s *Scheduler) Start(ctx context.Context) error {
 	log := ctrl.LoggerFrom(ctx).WithName("scheduler")
 	ctx = ctrl.LoggerInto(ctx, log)
-	go wait.UntilWithContext(ctx, s.schedule, 0)
+	go wait.UntilWithBackoff(ctx, s.schedule)
 	return nil
 }
 
@@ -180,15 +180,15 @@ func (cu *cohortsUsage) hasCommonFlavorResources(cohort string, assignment cache
 	return false
 }
 
-func (s *Scheduler) schedule(ctx context.Context) {
+func (s *Scheduler) schedule(ctx context.Context) wait.SpeedSignal {
 	log := ctrl.LoggerFrom(ctx)
 
 	// 1. Get the heads from the queues, including their desired clusterQueue.
 	// This operation blocks while the queues are empty.
 	headWorkloads := s.queues.Heads(ctx)
 	// If there are no elements, it means that the program is finishing.
 	if len(headWorkloads) == 0 {
-		return
+		return wait.KeepGoing
 	}
 	startTime := time.Now()
 
@@ -295,6 +295,10 @@ func (s *Scheduler) schedule(ctx context.Context) {
 		}
 	}
 	metrics.AdmissionAttempt(result, time.Since(startTime))
+	if result != metrics.AdmissionResultSuccess {
+		return wait.SlowDown
+	}
+	return wait.KeepGoing
 }
 
 type entryStatus string

diff --git a/pkg/util/wait/backoff.go b/pkg/util/wait/backoff.go
@@ -0,0 +1,87 @@
+/*
+Copyright 2024 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package wait
+
+import (
+	"context"
+	"time"
+
+	"k8s.io/apimachinery/pkg/util/wait"
+	"k8s.io/utils/clock"
+)
+
+// UntilWithBackoff runs f in a loop until context indicates finished. It
+// applies backoff depending on the SpeedSignal f returns.  Backoff increases
+// exponentially, ranging from 1ms to 100ms.
+func UntilWithBackoff(ctx context.Context, f func(context.Context) SpeedSignal) {
-func UntilWithBackoff(ctx context.Context, f func(context.Context) SpeedSignal) {
+func UntilWithBackoff(ctx context.Context, f func(context.Context) bool) {
-func UntilWithBackoff(ctx context.Context, f func(context.Context) SpeedSignal) {
+func UntilWithBackoff(ctx context.Context, f func(context.Context) bool) {
+	// create and drain timer, allowing reuse of same timer via timer.Reset
+	timer := clock.RealClock{}.NewTimer(0)
+	<-timer.C()
+	untilWithBackoff(ctx, f, timer)
+}
+
+func untilWithBackoff(ctx context.Context, f func(context.Context) SpeedSignal, timer clock.Timer) {
+	mgr := speedyBackoffManager{
+		backoff: noBackoff,
+		timer:   timer,
+	}
+	wait.BackoffUntil(func() {
+		mgr.toggleBackoff(f(ctx))
+	}, &mgr, false, ctx.Done())
+}
+
+// SpeedSignal indicates whether we should run the function again immediately,
+// or apply backoff.
+type SpeedSignal bool
+
+const (
+	// KeepGoing signals to continue immediately.
+	KeepGoing SpeedSignal = true
+	// SlowDown signals to backoff.
+	SlowDown SpeedSignal = false
+
+	noBackoff      = time.Millisecond * 0
+	initialBackoff = time.Millisecond * 1
+	maxBackoff     = time.Millisecond * 100
+)
+
+func (s *speedyBackoffManager) toggleBackoff(speedSignal SpeedSignal) {
+	switch speedSignal {
+	case KeepGoing:
+		s.backoff = noBackoff
+	case SlowDown:
+		if s.backoff == noBackoff {
+			s.backoff = initialBackoff
+		}
+	}
+}
+
+type speedyBackoffManager struct {
+	backoff time.Duration
+	timer   clock.Timer
+}
+
+var _ wait.BackoffManager = (*speedyBackoffManager)(nil)
+
+func (s *speedyBackoffManager) Backoff() clock.Timer {
+	s.timer.Reset(s.backoff)
+	s.backoff *= 2
+	if s.backoff > maxBackoff {
+		s.backoff = maxBackoff
+	}
+	return s.timer
+}
diff --git a/pkg/util/wait/backoff_test.go b/pkg/util/wait/backoff_test.go
@@ -0,0 +1,110 @@
+/*
+Copyright 2024 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package wait
+
+import (
+	"context"
+	"testing"
+	"time"
+
+	"github.com/google/go-cmp/cmp"
+	"k8s.io/utils/clock"
+	"k8s.io/utils/ptr"
+)
+
+type SpyTimer struct {
+	history *[]time.Duration
+	clock.Timer
+}
+
+func (s SpyTimer) Reset(d time.Duration) bool {
+	*s.history = append(*s.history, d)
+	return s.Timer.Reset(0)
+}
+
+func makeSpyTimer() SpyTimer {
+	timer := clock.RealClock{}.NewTimer(0)
+	<-timer.C()
+	return SpyTimer{history: ptr.To([]time.Duration{}), Timer: timer}
+}
+
+func ms(m time.Duration) time.Duration {
+	return time.Millisecond * m
+}
+
+func TestUntilWithBackoff(t *testing.T) {
+	type TestCase struct {
+		name     string
+		signals  []SpeedSignal
+		expected []time.Duration
+	}
+
+	testCases := []TestCase{
+		{
+			name:     "base case",
+			signals:  []SpeedSignal{},
+			expected: []time.Duration{ms(0)},
+		},
+		{
+			name:     "base SlowDown",
+			signals:  []SpeedSignal{SlowDown},
+			expected: []time.Duration{ms(0), ms(1)},
+		},
+		{
+			name:     "base KeepGoing",
+			signals:  []SpeedSignal{KeepGoing},
+			expected: []time.Duration{ms(0), ms(0)},
+		},
+		{
+			name:     "KeepGoing always returns 0",
+			signals:  []SpeedSignal{KeepGoing, KeepGoing, KeepGoing, KeepGoing},
+			expected: []time.Duration{ms(0), ms(0), ms(0), ms(0), ms(0)},
+		},
+		{
+			name:     "reset before reaching max backoff",
+			signals:  []SpeedSignal{SlowDown, SlowDown, SlowDown, KeepGoing, SlowDown, SlowDown, SlowDown, SlowDown, SlowDown, SlowDown, SlowDown, KeepGoing},
+			expected: []time.Duration{ms(0), ms(1), ms(2), ms(4), ms(0), ms(1), ms(2), ms(4), ms(8), ms(16), ms(32), ms(64), ms(0)},
+		},
+		{
+			name:     "double until max then reset",
+			signals:  []SpeedSignal{SlowDown, SlowDown, SlowDown, SlowDown, SlowDown, SlowDown, SlowDown, SlowDown, SlowDown, KeepGoing},
+			expected: []time.Duration{ms(0), ms(1), ms(2), ms(4), ms(8), ms(16), ms(32), ms(64), ms(100), ms(100), ms(0)},
+		},
+	}
+	for _, testCase := range testCases {
+		t.Run(testCase.name, func(t *testing.T) {
+			timer := makeSpyTimer()
+			ctx, cancel := context.WithCancel(context.Background())
+
+			i := 0
+			f := func(ctx context.Context) SpeedSignal {
+				if i >= len(testCase.signals) {
+					cancel()
+					return KeepGoing
+				}
+				signal := testCase.signals[i]
+				i++
+				return signal
+			}
+			untilWithBackoff(ctx, f, timer)
+
+			if diff := cmp.Diff(testCase.expected, *timer.history); diff != "" {
+				t.Errorf("Unexpected backoff time (-want,+got):\n%s", diff)
+			}
+		})
+	}
+}