-
Notifications
You must be signed in to change notification settings - Fork 1.2k
/
scaler.go
359 lines (312 loc) · 12.6 KB
/
scaler.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
/*
Copyright 2019 The Knative Authors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package kpa
import (
"context"
"fmt"
"net/http"
"time"
"knative.dev/pkg/apis/duck"
"knative.dev/pkg/injection/clients/dynamicclient"
"knative.dev/pkg/logging"
network "knative.dev/networking/pkg"
"knative.dev/networking/pkg/apis/networking"
nv1a1 "knative.dev/networking/pkg/apis/networking/v1alpha1"
"knative.dev/networking/pkg/prober"
pkgnet "knative.dev/pkg/network"
"knative.dev/serving/pkg/activator"
pav1alpha1 "knative.dev/serving/pkg/apis/autoscaling/v1alpha1"
"knative.dev/serving/pkg/autoscaler/config/autoscalerconfig"
"knative.dev/serving/pkg/reconciler/autoscaling/config"
kparesources "knative.dev/serving/pkg/reconciler/autoscaling/kpa/resources"
aresources "knative.dev/serving/pkg/reconciler/autoscaling/resources"
"knative.dev/serving/pkg/resources"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/dynamic"
)
const (
scaleUnknown = -1
probePeriod = 1 * time.Second
probeTimeout = 45 * time.Second
// The time after which the PA will be re-enqueued.
// This number is small, since `handleScaleToZero` below will
// re-enqueue for the configured grace period.
reenqeuePeriod = 1 * time.Second
// TODO(#3456): Remove this buffer once KPA does pod failure diagnostics.
//
// KPA will scale the Deployment down to zero if it fails to activate after ProgressDeadlineSeconds,
// however, after ProgressDeadlineSeconds, the Deployment itself updates its status, which causes
// the Revision to re-reconcile and diagnose pod failures. If we use the same timeout here, we will
// race the Revision reconciler and scale down the pods before it can actually surface the pod errors.
// We should instead do pod failure diagnostics here immediately before scaling down the Deployment.
activationTimeoutBuffer = 30 * time.Second
)
var probeOptions = []interface{}{
prober.WithHeader(network.UserAgentKey, network.AutoscalingUserAgent),
prober.WithHeader(network.ProbeHeaderName, activator.Name),
prober.ExpectsBody(activator.Name),
prober.ExpectsStatusCodes([]int{http.StatusOK}),
}
// for mocking in tests
type asyncProber interface {
Offer(context.Context, string, interface{}, time.Duration, time.Duration, ...interface{}) bool
}
// scaler scales the target of a kpa-class PA up or down including scaling to zero.
type scaler struct {
psInformerFactory duck.InformerFactory
dynamicClient dynamic.Interface
transport http.RoundTripper
// For sync probes.
activatorProbe func(pa *pav1alpha1.PodAutoscaler, transport http.RoundTripper) (bool, error)
// For async probes.
probeManager asyncProber
enqueueCB func(interface{}, time.Duration)
}
// newScaler creates a scaler.
func newScaler(ctx context.Context, psInformerFactory duck.InformerFactory, enqueueCB func(interface{}, time.Duration)) *scaler {
logger := logging.FromContext(ctx)
transport := pkgnet.NewProberTransport()
ks := &scaler{
// Wrap it in a cache, so that we don't stamp out a new
// informer/lister each time.
psInformerFactory: psInformerFactory,
dynamicClient: dynamicclient.Get(ctx),
transport: transport,
// Production setup uses the default probe implementation.
activatorProbe: activatorProbe,
probeManager: prober.New(func(arg interface{}, success bool, err error) {
logger.Infof("Async prober is done for %v: success?: %v error: %v", arg, success, err)
// Re-enqueue the PA in any case. If the probe timed out to retry again, if succeeded to scale to 0.
enqueueCB(arg, reenqeuePeriod)
}, transport),
enqueueCB: enqueueCB,
}
return ks
}
// Resolves the pa to the probing endpoint Eg. http://hostname:port/healthz
func paToProbeTarget(pa *pav1alpha1.PodAutoscaler) string {
svc := pkgnet.GetServiceHostname(pa.Status.ServiceName, pa.Namespace)
port := networking.ServicePort(pa.Spec.ProtocolType)
return fmt.Sprintf("http://%s:%d/%s", svc, port, network.ProbePath)
}
// activatorProbe returns true if via probe it determines that the
// PA is backed by the Activator.
func activatorProbe(pa *pav1alpha1.PodAutoscaler, transport http.RoundTripper) (bool, error) {
// No service name -- no probe.
if pa.Status.ServiceName == "" {
return false, nil
}
return prober.Do(context.Background(), transport, paToProbeTarget(pa), probeOptions...)
}
func lastPodRetention(pa *pav1alpha1.PodAutoscaler, cfg *autoscalerconfig.Config) time.Duration {
d, ok := pa.ScaleToZeroPodRetention()
if ok {
return d
}
return cfg.ScaleToZeroPodRetentionPeriod
}
// pre: 0 <= min <= max && 0 <= x
func applyBounds(min, max, x int32) int32 {
if x < min {
return min
}
if max != 0 && x > max {
return max
}
return x
}
func durationMax(d1, d2 time.Duration) time.Duration {
if d1 < d2 {
return d2
}
return d1
}
func (ks *scaler) handleScaleToZero(ctx context.Context, pa *pav1alpha1.PodAutoscaler,
sks *nv1a1.ServerlessService, desiredScale int32) (int32, bool) {
if desiredScale != 0 {
return desiredScale, true
}
// We should only scale to zero when three of the following conditions are true:
// a) enable-scale-to-zero from configmap is true
// b) The PA has been active for at least the stable window, after which it
// gets marked inactive, and
// c) the PA has been backed by the Activator for at least the grace period
// of time.
// Alternatively, if (a) and the revision did not succeed to activate in
// `activationTimeout` time -- also scale it to 0.
cfgs := config.FromContext(ctx)
cfgAS := cfgs.Autoscaler
if !cfgAS.EnableScaleToZero {
return 1, true
}
cfgD := cfgs.Deployment
activationTimeout := cfgD.ProgressDeadline + activationTimeoutBuffer
now := time.Now()
logger := logging.FromContext(ctx)
switch {
case pa.Status.IsActivating(): // Active=Unknown
// If we are stuck activating for longer than our progress deadline, presume we cannot succeed and scale to 0.
if pa.Status.CanFailActivation(now, activationTimeout) {
logger.Info("Activation has timed out after ", activationTimeout)
return desiredScale, true
}
ks.enqueueCB(pa, activationTimeout)
return scaleUnknown, false
case pa.Status.IsActive(): // Active=True
// Don't scale-to-zero if the PA is active
// but return `(0, false)` to mark PA inactive, instead.
sw := aresources.StableWindow(pa, cfgAS)
af := pa.Status.ActiveFor(now)
if af >= sw {
// If SKS is in proxy mode, then there is high probability
// of SKS not changing its spec/status and thus not triggering
// a new reconciliation of PA.
if sks.Spec.Mode == nv1a1.SKSOperationModeProxy {
logger.Debug("SKS is already in proxy mode, auto-re-enqueue PA")
// Long enough to ensure current iteration is finished.
ks.enqueueCB(pa, 3*time.Second)
}
logger.Info("Can deactivate PA, was active for ", af)
return desiredScale, false
}
// Otherwise, scale down to at most 1 for the remainder of the idle period and then
// reconcile PA again.
logger.Infof("Sleeping additionally for %v before can scale to 0", sw-af)
ks.enqueueCB(pa, sw-af)
return 1, true
default: // Active=False
var (
err error
r bool = true
)
if resolveTBC(ctx, pa) != -1 {
// if TBC is -1 activator is guaranteed to already be in the path.
// Otherwise, probe to make sure Activator is in path.
r, err = ks.activatorProbe(pa, ks.transport)
logger.Infof("Probing activator = %v, err = %v", r, err)
}
if r {
// This enforces that the revision has been backed by the Activator for at least
// ScaleToZeroGracePeriod time.
// And at least ScaleToZeroPodRetentionPeriod since PA became inactive.
// Most conservative check, if it passes we're good.
lastPodTimeout := lastPodRetention(pa, cfgAS)
lastPodMaxTimeout := durationMax(cfgAS.ScaleToZeroGracePeriod, lastPodTimeout)
// If we have been inactive for this long, we can scale to 0!
if pa.Status.InactiveFor(now) >= lastPodMaxTimeout {
return desiredScale, true
}
// Now check last pod retention timeout. Since it's a hard deadline, regardless
// of network programming state we should circle back after that time period.
if lastPodTimeout > 0 {
if inactiveTime := pa.Status.InactiveFor(now); inactiveTime < lastPodTimeout {
logger.Infof("Can't scale to 0; InactiveFor %v < ScaleToZeroPodRetentionPeriod = %v",
inactiveTime, lastPodTimeout)
ks.enqueueCB(pa, lastPodTimeout-inactiveTime)
return desiredScale, false
}
logger.Debug("Last pod timeout satisfied")
}
// Otherwise check how long SKS was in proxy mode.
// Compute the difference between time we've been proxying with the timeout.
// If it's positive, that's the time we need to sleep, if negative -- we
// can scale to zero.
pf := sks.Status.ProxyFor()
to := cfgAS.ScaleToZeroGracePeriod - pf
if to <= 0 {
logger.Info("Fast path scaling to 0, in proxy mode for: ", pf)
return desiredScale, true
}
// Re-enqueue the PA for reconciliation with timeout of `to` to make sure we wait
// long enough.
logger.Info("Enqueueing PA after ", to)
ks.enqueueCB(pa, to)
return desiredScale, false
}
// Otherwise (any prober failure) start the async probe.
logger.Info("PA is not yet backed by activator, cannot scale to zero")
if !ks.probeManager.Offer(context.Background(), paToProbeTarget(pa), pa, probePeriod, probeTimeout, probeOptions...) {
logger.Info("Probe for revision is already in flight")
}
return desiredScale, false
}
}
func (ks *scaler) applyScale(ctx context.Context, pa *pav1alpha1.PodAutoscaler, desiredScale int32,
ps *pav1alpha1.PodScalable) error {
logger := logging.FromContext(ctx)
gvr, name, err := resources.ScaleResourceArguments(pa.Spec.ScaleTargetRef)
if err != nil {
return err
}
psNew := ps.DeepCopy()
psNew.Spec.Replicas = &desiredScale
patch, err := duck.CreatePatch(ps, psNew)
if err != nil {
return err
}
patchBytes, err := patch.MarshalJSON()
if err != nil {
return err
}
_, err = ks.dynamicClient.Resource(*gvr).Namespace(pa.Namespace).Patch(ctx, ps.Name, types.JSONPatchType,
patchBytes, metav1.PatchOptions{})
if err != nil {
return fmt.Errorf("failed to apply scale %d to scale target %s: %w", desiredScale, name, err)
}
logger.Debug("Successfully scaled to ", desiredScale)
return nil
}
// scale attempts to scale the given PA's target reference to the desired scale.
func (ks *scaler) scale(ctx context.Context, pa *pav1alpha1.PodAutoscaler, sks *nv1a1.ServerlessService, desiredScale int32) (int32, error) {
asConfig := config.FromContext(ctx).Autoscaler
logger := logging.FromContext(ctx)
if desiredScale < 0 && !pa.Status.IsActivating() {
logger.Debug("Metrics are not yet being collected.")
return desiredScale, nil
}
min, max := pa.ScaleBounds(asConfig)
initialScale := kparesources.GetInitialScale(asConfig, pa)
// Log reachability as quoted string, since default value is "".
logger.Debugf("MinScale = %d, MaxScale = %d, InitialScale = %d, DesiredScale = %d Reachable = %q",
min, max, initialScale, desiredScale, pa.Spec.Reachability)
// If initial scale has been attained, ignore the initialScale altogether.
if initialScale > 1 && !pa.Status.IsScaleTargetInitialized() {
// Ignore initial scale if minScale >= initialScale.
if min < initialScale {
logger.Debugf("Adjusting min to meet the initial scale: %d -> %d", min, initialScale)
}
min = intMax(initialScale, min)
}
if newScale := applyBounds(min, max, desiredScale); newScale != desiredScale {
logger.Debugf("Adjusting desiredScale to meet the min and max bounds before applying: %d -> %d", desiredScale, newScale)
desiredScale = newScale
}
desiredScale, shouldApplyScale := ks.handleScaleToZero(ctx, pa, sks, desiredScale)
if !shouldApplyScale {
return desiredScale, nil
}
ps, err := resources.GetScaleResource(ctx, pa.Namespace, pa.Spec.ScaleTargetRef, ks.psInformerFactory)
if err != nil {
return desiredScale, fmt.Errorf("failed to get scale target %v: %w", pa.Spec.ScaleTargetRef, err)
}
currentScale := int32(1)
if ps.Spec.Replicas != nil {
currentScale = *ps.Spec.Replicas
}
if desiredScale == currentScale {
return desiredScale, nil
}
logger.Infof("Scaling from %d to %d", currentScale, desiredScale)
return desiredScale, ks.applyScale(ctx, pa, desiredScale, ps)
}