/
recommender.go
658 lines (576 loc) · 31.4 KB
/
recommender.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
package recommender
import (
"context"
"errors"
"fmt"
"math"
"time"
v2 "k8s.io/api/autoscaling/v2"
corev1 "k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/tools/record"
"k8s.io/utils/ptr"
"sigs.k8s.io/controller-runtime/pkg/log"
"github.com/mercari/tortoise/api/v1beta3"
"github.com/mercari/tortoise/pkg/event"
"github.com/mercari/tortoise/pkg/features"
hpaservice "github.com/mercari/tortoise/pkg/hpa"
"github.com/mercari/tortoise/pkg/utils"
)
type Service struct {
// configurations
MaxReplicasRecommendationMultiplier float64
MinReplicasRecommendationMultiplier float64
eventRecorder record.EventRecorder
minimumMinReplicas int32
maximumTargetResourceUtilization int32
minimumTargetResourceUtilization int32
preferredMaxReplicas int32
maxResourceSize corev1.ResourceList
// the key is the container name, and "*" is the value for all containers.
minResourceSizePerContainer map[string]corev1.ResourceList
maximumMaxReplica int32
featureFlags []features.FeatureFlag
// maxAllowedScalingDownRatio is the max allowed scaling down ratio.
// For example, if the current resource request is 100m, the max allowed scaling down ratio is 0.8,
// the minimum resource request that Tortoise can apply is 80m.
maxAllowedScalingDownRatio float64
bufferRatioOnVerticalResource float64
}
func New(
maxReplicasRecommendationMultiplier float64,
minReplicasRecommendationMultiplier float64,
maximumTargetResourceUtilization int,
minimumTargetResourceUtilization int,
minimumMinReplicas int,
preferredMaxReplicas int,
minCPU string,
minMemory string,
minimumCPUPerContainer map[string]string,
minimumMemoryPerContainer map[string]string,
maxCPU string,
maxMemory string,
maximumMaxReplica int32,
maxAllowedScalingDownRatio float64,
bufferRatioOnVerticalResourceRecommendation float64,
featureFlags []features.FeatureFlag,
eventRecorder record.EventRecorder,
) *Service {
minimumCPUPerContainer["*"] = minCPU
minimumMemoryPerContainer["*"] = minMemory
minResourceSizePerContainer := map[string]corev1.ResourceList{}
for containerName, v := range minimumCPUPerContainer {
minResourceSizePerContainer[containerName] = corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse(v),
}
}
for containerName, v := range minimumMemoryPerContainer {
if _, ok := minResourceSizePerContainer[containerName]; !ok {
minResourceSizePerContainer[containerName] = corev1.ResourceList{}
}
minResourceSizePerContainer[containerName][corev1.ResourceMemory] = resource.MustParse(v)
}
return &Service{
eventRecorder: eventRecorder,
MaxReplicasRecommendationMultiplier: maxReplicasRecommendationMultiplier,
MinReplicasRecommendationMultiplier: minReplicasRecommendationMultiplier,
maximumTargetResourceUtilization: int32(maximumTargetResourceUtilization),
minimumTargetResourceUtilization: int32(minimumTargetResourceUtilization),
minimumMinReplicas: int32(minimumMinReplicas),
preferredMaxReplicas: int32(preferredMaxReplicas),
minResourceSizePerContainer: minResourceSizePerContainer,
maxResourceSize: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse(maxCPU),
corev1.ResourceMemory: resource.MustParse(maxMemory),
},
maximumMaxReplica: maximumMaxReplica,
featureFlags: featureFlags,
maxAllowedScalingDownRatio: maxAllowedScalingDownRatio,
bufferRatioOnVerticalResource: bufferRatioOnVerticalResourceRecommendation,
}
}
func (s *Service) updateVPARecommendation(ctx context.Context, tortoise *v1beta3.Tortoise, hpa *v2.HorizontalPodAutoscaler, replicaNum int32, now time.Time) (*v1beta3.Tortoise, error) {
scaledUpBasedOnPreferredMaxReplicas := false
closeToPreferredMaxReplicas := false
if hasHorizontal(tortoise) {
// Handle TortoiseConditionTypeScaledUpBasedOnPreferredMaxReplicas condition first.
if replicaNum >= s.preferredMaxReplicas &&
// If the current replica number is equal to the maximumMaxReplica,
// increasing the resource request would not change the situation that the replica number is higher than preferredMaxReplicas.
*hpa.Spec.MinReplicas < replicaNum &&
features.Contains(s.featureFlags, features.VerticalScalingBasedOnPreferredMaxReplicas) &&
allowVerticalScalingBasedOnPreferredMaxReplicas(tortoise, now) {
c := utils.GetTortoiseCondition(tortoise, v1beta3.TortoiseConditionTypeScaledUpBasedOnPreferredMaxReplicas)
if c == nil || // no condition yet
c.Status == v1.ConditionFalse {
// It's the first time to notice that the current replica number is bigger than the preferred max replica number.
// First 30min, we don't use VerticalScalingBasedOnPreferredMaxReplicas because this replica increase might be very temporal.
// So, here we just change the condition to True, but doesn't trigger scaledUpBasedOnPreferredMaxReplicas.
tortoise = utils.ChangeTortoiseCondition(tortoise, v1beta3.TortoiseConditionTypeScaledUpBasedOnPreferredMaxReplicas, v1.ConditionTrue, "ScaledUpBasedOnPreferredMaxReplicas", "the current number of replicas is bigger than the preferred max replica number", now)
} else {
// We keep increasing the size until we hit the maxResourceSize.
tortoise = utils.ChangeTortoiseCondition(tortoise, v1beta3.TortoiseConditionTypeScaledUpBasedOnPreferredMaxReplicas, v1.ConditionTrue, "ScaledUpBasedOnPreferredMaxReplicas", "the current number of replicas is bigger than the preferred max replica number", now)
scaledUpBasedOnPreferredMaxReplicas = true
}
}
if replicaNum < s.preferredMaxReplicas {
// Change TortoiseConditionTypeScaledUpBasedOnPreferredMaxReplicas to False.
tortoise = utils.ChangeTortoiseCondition(tortoise, v1beta3.TortoiseConditionTypeScaledUpBasedOnPreferredMaxReplicas, v1.ConditionFalse, "ScaledUpBasedOnPreferredMaxReplicas", "the current number of replicas is not bigger than the preferred max replica number", now)
}
if int32(float64(s.preferredMaxReplicas)*0.8) < replicaNum {
closeToPreferredMaxReplicas = true
}
}
logger := log.FromContext(ctx)
requestMap := map[string]map[corev1.ResourceName]resource.Quantity{}
for _, r := range tortoise.Status.Conditions.ContainerResourceRequests {
requestMap[r.ContainerName] = map[corev1.ResourceName]resource.Quantity{}
for resourcename, value := range r.Resource {
requestMap[r.ContainerName][resourcename] = value
}
}
recommendationMap := map[string]map[corev1.ResourceName]resource.Quantity{}
for _, perContainer := range tortoise.Status.Conditions.ContainerRecommendationFromVPA {
recommendationMap[perContainer.ContainerName] = map[corev1.ResourceName]resource.Quantity{}
for k, perResource := range perContainer.MaxRecommendation {
recommendationMap[perContainer.ContainerName][k] = perResource.Quantity
}
}
// containerName → MinAllocatedResources
minAllocatedResourcesMap := map[string]v1.ResourceList{}
for _, r := range tortoise.Spec.ResourcePolicy {
minAllocatedResourcesMap[r.ContainerName] = r.MinAllocatedResources
}
// containerName → MaxAllocatedResources
maxAllocatedResourcesMap := map[string]v1.ResourceList{}
for _, r := range tortoise.Spec.ResourcePolicy {
maxAllocatedResourcesMap[r.ContainerName] = r.MaxAllocatedResources
}
newRecommendations := []v1beta3.RecommendedContainerResources{}
for _, r := range tortoise.Status.AutoscalingPolicy {
recommendation := v1beta3.RecommendedContainerResources{
ContainerName: r.ContainerName,
RecommendedResource: map[corev1.ResourceName]resource.Quantity{},
}
for k, p := range r.Policy {
reqmap, ok := requestMap[r.ContainerName]
if !ok {
if p != v1beta3.AutoscalingTypeOff {
logger.Error(nil, fmt.Sprintf("no resource request on the container %s, but the resource %s of this container has %s autoscaling policy", r.ContainerName, k, p))
}
continue
}
req, ok := reqmap[k]
if !ok {
if p != v1beta3.AutoscalingTypeOff {
logger.Error(nil, fmt.Sprintf("no %s request on the container %s, but this resource has %s autoscaling policy", k, r.ContainerName, p))
}
continue
}
recomMap, ok := recommendationMap[r.ContainerName]
if !ok {
return tortoise, fmt.Errorf("no resource recommendation from VPA for the container %s", r.ContainerName)
}
recom, ok := recomMap[k]
if !ok {
return tortoise, fmt.Errorf("no %s recommendation from VPA for the container %s", k, r.ContainerName)
}
var newSize int64
var reason string
var err error
newSize, reason, err = s.calculateBestNewSize(ctx, tortoise, p, r.ContainerName, recom, k, hpa, replicaNum, req, minAllocatedResourcesMap[r.ContainerName], maxAllocatedResourcesMap[r.ContainerName], scaledUpBasedOnPreferredMaxReplicas, closeToPreferredMaxReplicas)
if err != nil {
return tortoise, err
}
if newSize != req.MilliValue() {
logger.Info("The recommendation of resource request in Tortoise is updated", "container name", r.ContainerName, "resource name", k, "reason", reason)
s.eventRecorder.Event(tortoise, corev1.EventTypeNormal, event.RecommendationUpdated, fmt.Sprintf("The recommendation of %v request (%v) in Tortoise status is updated. Reason: %v", k, r.ContainerName, reason))
} else {
logger.Info("The recommendation of the container is not updated", "container name", r.ContainerName, "resource name", k, "reason", reason)
}
q := resource.NewMilliQuantity(newSize, req.Format)
recommendation.RecommendedResource[k] = *q
}
newRecommendations = append(newRecommendations, recommendation)
}
tortoise.Status.Recommendations.Vertical.ContainerResourceRecommendation = newRecommendations
return tortoise, nil
}
func allowVerticalScalingBasedOnPreferredMaxReplicas(tortoise *v1beta3.Tortoise, now time.Time) bool {
for _, c := range tortoise.Status.Conditions.TortoiseConditions {
if c.Type == v1beta3.TortoiseConditionTypeScaledUpBasedOnPreferredMaxReplicas && c.Status == v1.ConditionTrue {
if c.LastTransitionTime.Add(30 * time.Minute).After(now) {
// If the last transition time is within 30 minutes,
// we don't allow the vertical scaling based on the preferred max replicas.
return false
}
}
}
return true
}
// calculateBestNewSize calculates the best new resource request based on the current replica number and the recommended resource request.
// Even if the autoscaling policy is Horizontal, this function may suggest the vertical scaling, see comments in the function.
func (s *Service) calculateBestNewSize(
ctx context.Context,
tortoise *v1beta3.Tortoise,
p v1beta3.AutoscalingType,
containerName string,
recommendedResourceRequest resource.Quantity,
k corev1.ResourceName,
hpa *v2.HorizontalPodAutoscaler,
replicaNum int32,
resourceRequest resource.Quantity,
minAllocatedResources, maxAllocatedResources corev1.ResourceList,
scaledUpBasedOnPreferredMaxReplicas, closeToPreferredMaxReplicas bool,
) (int64, string, error) {
if p == v1beta3.AutoscalingTypeOff {
// Just keep the current resource request.
return resourceRequest.MilliValue(), "The autoscaling policy for this resource is Off", nil
}
if p == v1beta3.AutoscalingTypeVertical {
// The user configures Vertical on this container's resource. This is just vertical scaling.
// Basically we want to reduce the frequency of scaling up/down because vertical scaling has to restart deployment.
// The ideal size is {VPA recommendation} * (1+buffer).
idealSize := float64(recommendedResourceRequest.MilliValue()) * (1 + s.bufferRatioOnVerticalResource)
if idealSize > float64(resourceRequest.MilliValue()) {
// Scale up always happens when idealSize goes higher than the current resource request.
// In this case, we don't just apply idealSize, but apply idealSize * (1+buffer)
// so that we increase the resource request more than actually needed,
// which reduces the need of scaling up in the future.
idealSize = idealSize * (1 + s.bufferRatioOnVerticalResource)
jastified := s.justifyNewSize(resourceRequest.MilliValue(), int64(idealSize), k, minAllocatedResources, maxAllocatedResources, containerName)
return jastified, fmt.Sprintf("change %v request (%v) (%v → %v) based on VPA suggestion", k, containerName, resourceRequest.MilliValue(), jastified), nil
}
// Scale down - we ignore too small scale down to reduce the frequency of restarts.
// previousIdealSize was the ideal size which was calculated when this resource request was applied.
previousIdealSize := float64(resourceRequest.MilliValue()) / (1 + s.bufferRatioOnVerticalResource)
if previousIdealSize*(1-s.bufferRatioOnVerticalResource) > idealSize {
// The current ideal size is too small campared to the previous ideal size.
jastified := s.justifyNewSize(resourceRequest.MilliValue(), int64(idealSize), k, minAllocatedResources, maxAllocatedResources, containerName)
return jastified, fmt.Sprintf("change %v request (%v) (%v → %v) based on VPA suggestion", k, containerName, resourceRequest.MilliValue(), jastified), nil
}
return resourceRequest.MilliValue(),
fmt.Sprintf("Tortoise recommends %v as a new %v request (%v), but it's very small scale down change, so tortoise just ignores it", idealSize, k, containerName),
nil
}
// p == v1beta3.AutoscalingTypeHorizontal
// When the current replica num is more than or equal to the preferredMaxReplicas,
// make the container size bigger (just multiple by 1.3) so that the replica number will be descreased.
//
// Here also covers the scenario where the current replica num hits MaximumMaxReplicas.
if scaledUpBasedOnPreferredMaxReplicas {
// We keep increasing the size until we hit the maxResourceSize.
newSize := int64(float64(resourceRequest.MilliValue()) * 1.3)
jastifiedNewSize := s.justifyNewSize(resourceRequest.MilliValue(), newSize, k, minAllocatedResources, maxAllocatedResources, containerName)
msg := fmt.Sprintf("the current number of replicas (%v) is bigger than the preferred max replica number in this cluster (%v), so make %v request (%s) bigger (%v → %v)", replicaNum, s.preferredMaxReplicas, k, containerName, resourceRequest.MilliValue(), jastifiedNewSize)
return jastifiedNewSize, msg, nil
}
if closeToPreferredMaxReplicas {
// The current replica number is close or more than preferredMaxReplicas.
// So, we just keep the current resource request
// until the replica number goes lower
// because scaling down the resource request might increase the replica number further more.
return resourceRequest.MilliValue(), fmt.Sprintf("the current number of replicas is close to the preferred max replica number in this cluster, so keep the current resource request in %s in %s", k, containerName), nil
}
if replicaNum <= s.minimumMinReplicas {
// The current replica number is less than or equal to the minimumMinReplicas.
// The replica number is too small and hits the minReplicas.
// So, the resource utilization might be super low because HPA cannot scale down further.
// In this case, we'd like to reduce the resource request as much as possible so that the resource utilization will be higher.
// And note that we don't increase the resource request even if VPA recommends it.
// If the resource utilization goes up, HPA does scale up, not VPA.
newSize := resourceRequest.MilliValue()
if recommendedResourceRequest.MilliValue() < resourceRequest.MilliValue() {
// We use the recommended resource request if it's smaller than the current resource request.
newSize = recommendedResourceRequest.MilliValue()
}
jastified := s.justifyNewSize(resourceRequest.MilliValue(), newSize, k, minAllocatedResources, maxAllocatedResources, containerName)
return jastified, fmt.Sprintf("the current number of replicas is equal or smaller than the minimum min replica number in this cluster (%v), so make %v request (%v) smaller (%v → %v) based on VPA suggestion", s.minimumMinReplicas, k, containerName, resourceRequest.MilliValue(), jastified), nil
}
// The replica number is OK based on minimumMinReplicas and preferredMaxReplicas.
if !hasMultipleHorizontal(tortoise) || replicaNum == *hpa.Spec.MinReplicas {
// Nothing else to do for a single-horizontal Tortoise.
// Also, if the current replica number is equal to the minReplicas,
// we don't change the resource request based on the current resource utilization
// because even if the resource utilization is low, it's due to the minReplicas.
return s.justifyNewSize(resourceRequest.MilliValue(), resourceRequest.MilliValue(), k, minAllocatedResources, maxAllocatedResources, containerName), "nothing to do", nil
}
targetUtilizationValue, err := hpaservice.GetHPATargetValue(ctx, hpa, containerName, k)
if err != nil {
return 0, "", fmt.Errorf("get the target value from HPA: %w", err)
}
upperUtilization := (float64(recommendedResourceRequest.MilliValue()) / float64(resourceRequest.MilliValue())) * 100
// If upperUtilization is very close to targetUtilizationValue, we don't have to change the resource request.
if float64(targetUtilizationValue)*0.9 > upperUtilization {
// upperUtilization is much less than targetUtilizationValue, which seems weird in normal cases.
// In this case, most likely the container size is unbalanced. (= we need multi-container specific optimization)
// So, for example, when app:istio use the resource in the ratio of 1:5, but the resource request is 1:1,
// the resource given to istio is always wasted. (since HPA is always kicked by the resource utilization of app)
//
// And this case, reducing the resource request of container in this kind of weird situation
// so that the upper usage will be the target usage.
newSize := int64(float64(recommendedResourceRequest.MilliValue()) * 100.0 / float64(targetUtilizationValue))
jastified := s.justifyNewSize(resourceRequest.MilliValue(), newSize, k, minAllocatedResources, maxAllocatedResources, containerName)
return jastified, fmt.Sprintf("the current resource usage (%v, %v%%) is too small and it's due to unbalanced container size, so make %v request (%v) smaller (%v → %v) based on VPA's recommendation and HPA target utilization %v%%", recommendedResourceRequest.MilliValue(), int(upperUtilization), k, containerName, resourceRequest.MilliValue(), jastified, targetUtilizationValue), nil
}
// Just keep the current resource request.
// Only do justification.
return s.justifyNewSize(resourceRequest.MilliValue(), resourceRequest.MilliValue(), k, minAllocatedResources, maxAllocatedResources, containerName), "nothing to do", nil
}
func hasHorizontal(tortoise *v1beta3.Tortoise) bool {
for _, r := range tortoise.Status.AutoscalingPolicy {
for _, p := range r.Policy {
if p == v1beta3.AutoscalingTypeHorizontal {
return true
}
}
}
return false
}
func hasMultipleHorizontal(t *v1beta3.Tortoise) bool {
count := 0
for _, r := range t.Status.AutoscalingPolicy {
for _, p := range r.Policy {
if p == v1beta3.AutoscalingTypeHorizontal {
count++
}
if count > 1 {
return true
}
}
}
return false
}
func (s *Service) getGlobalMinResourceSize(k corev1.ResourceName, containerName string) resource.Quantity {
if v, ok := s.minResourceSizePerContainer[containerName]; ok {
return v[k]
}
return s.minResourceSizePerContainer["*"][k]
}
func (s *Service) justifyNewSize(oldSizeMilli, newSizeMilli int64, k corev1.ResourceName, minAllocatedResources, maxAllocatedResources corev1.ResourceList, containerName string) int64 {
max := maxAllocatedResources[k]
min := minAllocatedResources[k]
// Bigger min requirement is used.
if min.Cmp(s.getGlobalMinResourceSize(k, containerName)) < 0 {
// s.minResourceSize[k] is bigger than minAllocatedResources[k]
min = s.getGlobalMinResourceSize(k, containerName)
}
// Smaller max requirement is used.
if max.Cmp(s.maxResourceSize[k]) > 0 || max.IsZero() {
// s.maxResourceSize[k] is smaller than maxAllocatedResources[k]
// OR maxAllocatedResources[k] is unset.
max = s.maxResourceSize[k]
}
// If the new size is too small, which isn't acceptable based on the maxAllowedScalingDownRatio.
// We use oldSizeMilli * s.maxAllowedScalingDownRatio as the new size.
//
// So, here if min is smaller than oldSizeMilli * s.maxAllowedScalingDownRatio,
// we use oldSizeMilli * s.maxAllowedScalingDownRatio as min.
if min.MilliValue() < int64(float64(oldSizeMilli)*s.maxAllowedScalingDownRatio) {
min = ptr.Deref(resource.NewMilliQuantity(int64(float64(oldSizeMilli)*s.maxAllowedScalingDownRatio), min.Format), min)
}
if newSizeMilli > max.MilliValue() {
return max.MilliValue()
} else if newSizeMilli < min.MilliValue() {
return min.MilliValue()
}
return newSizeMilli
}
func (s *Service) updateHPARecommendation(ctx context.Context, tortoise *v1beta3.Tortoise, hpa *v2.HorizontalPodAutoscaler, replicaNum int32, now time.Time) (*v1beta3.Tortoise, error) {
var err error
tortoise, err = s.updateHPATargetUtilizationRecommendations(ctx, tortoise, hpa, replicaNum)
if err != nil {
return tortoise, fmt.Errorf("update HPA target utilization recommendations: %w", err)
}
tortoise, err = s.updateHPAMinMaxReplicasRecommendations(tortoise, replicaNum, now)
if err != nil {
return tortoise, err
}
return tortoise, nil
}
func (s *Service) UpdateRecommendations(ctx context.Context, tortoise *v1beta3.Tortoise, hpa *v2.HorizontalPodAutoscaler, replicaNum int32, now time.Time) (*v1beta3.Tortoise, error) {
if tortoise.Status.TortoisePhase == v1beta3.TortoisePhaseEmergency || tortoise.Status.TortoisePhase == v1beta3.TortoisePhaseBackToNormal {
// If the update mode is emergency or backtonormal, we don't update any recommendation.
// This is because the replica number goes up during the emergency mode,
// - the recommendation of min/max replicas would be broken by unusual high number of replicas.
// - the recommendation of target utilization would be broken by unusual lower resource utilization.
// - the recommendation of VPA would be broken by unusual lower resource utilization.
log.FromContext(ctx).Info("The recommendation of minReplica/maxReplica is not updated because of the emergency mode")
return tortoise, nil
}
var err error
tortoise, err = s.updateHPARecommendation(ctx, tortoise, hpa, replicaNum, now)
if err != nil {
return tortoise, fmt.Errorf("update HPA recommendations: %w", err)
}
tortoise, err = s.updateVPARecommendation(ctx, tortoise, hpa, replicaNum, now)
if err != nil {
return tortoise, fmt.Errorf("update VPA recommendations: %w", err)
}
return tortoise, nil
}
func (s *Service) updateHPAMinMaxReplicasRecommendations(tortoise *v1beta3.Tortoise, replicaNum int32, now time.Time) (*v1beta3.Tortoise, error) {
currentReplica := float64(replicaNum)
min, err := s.updateReplicasRecommendation(int32(math.Ceil(currentReplica*s.MinReplicasRecommendationMultiplier)), tortoise.Status.Recommendations.Horizontal.MinReplicas, now, s.minimumMinReplicas)
if err != nil {
return tortoise, fmt.Errorf("update MinReplicas recommendation: %w", err)
}
tortoise.Status.Recommendations.Horizontal.MinReplicas = min
max, err := s.updateReplicasRecommendation(int32(math.Ceil(currentReplica*s.MaxReplicasRecommendationMultiplier)), tortoise.Status.Recommendations.Horizontal.MaxReplicas, now, int32(float64(s.minimumMinReplicas)*s.MaxReplicasRecommendationMultiplier/s.MinReplicasRecommendationMultiplier))
if err != nil {
return tortoise, fmt.Errorf("update MaxReplicas recommendation: %w", err)
}
tortoise.Status.Recommendations.Horizontal.MaxReplicas = max
return tortoise, nil
}
func findSlotInReplicasRecommendation(recommendations []v1beta3.ReplicasRecommendation, now time.Time) (int, error) {
index := -1
for i, r := range recommendations {
tz, err := time.LoadLocation(r.TimeZone)
if err == nil {
// if the timezone is invalid, just ignore it.
now = now.In(tz)
}
if now.Hour() < r.To && now.Hour() >= r.From && (r.WeekDay == nil || now.Weekday().String() == *r.WeekDay) {
index = i
break
}
}
if index == -1 {
// shouldn't happen unless someone directly modifies the status.
return -1, errors.New("no recommendation slot")
}
return index, nil
}
// updateMinReplicasRecommendation replaces value if the value is higher than the current value.
func (s *Service) updateReplicasRecommendation(value int32, recommendations []v1beta3.ReplicasRecommendation, now time.Time, min int32) ([]v1beta3.ReplicasRecommendation, error) {
// find the corresponding recommendations.
index, err := findSlotInReplicasRecommendation(recommendations, now)
if err != nil {
return recommendations, err
}
if value < s.minimumMinReplicas {
value = min
}
timeBiasedRecommendation := recommendations[index].Value
if now.Sub(recommendations[index].UpdatedAt.Time).Hours() >= 23 {
// only if the recommendation is not updated within 24 hours, we give the time bias
// so that the past recommendation is decreased a bit and the current recommendation likely replaces it.
timeBiasedRecommendation = int32(math.Trunc(float64(recommendations[index].Value) * 0.95))
}
if value > timeBiasedRecommendation {
recommendations[index].Value = value
} else {
recommendations[index].Value = timeBiasedRecommendation
}
recommendations[index].UpdatedAt = metav1.NewTime(now)
return recommendations, nil
}
func (s *Service) updateHPATargetUtilizationRecommendations(ctx context.Context, tortoise *v1beta3.Tortoise, hpa *v2.HorizontalPodAutoscaler, replicaNum int32) (*v1beta3.Tortoise, error) {
logger := log.FromContext(ctx)
if replicaNum == s.maximumMaxReplica {
// We skip generating HPA recommendations if the current replica number is equal to the maximumMaxReplica
// because HPA recommendation would be not valid in this case
// and, either way, editing HPA would not change any situation because the replica number is already at the maximum.
//
// This situation should be rare because the replica number shouldn't reach the maximumMaxReplica in normal situation.
logger.Error(nil, "The recommendation of HPA is not updated because the current replica number is equal to the maximumMaxReplica", "current replica number", replicaNum, "maximumMaxReplica", s.maximumMaxReplica)
// We still update VPA recommendations because VPA recommendations are not affected by the replica number
// and hopefully making the container bigger would help the situation.
return tortoise, nil
}
requestMap := map[string]map[corev1.ResourceName]resource.Quantity{}
for _, r := range tortoise.Status.Conditions.ContainerResourceRequests {
requestMap[r.ContainerName] = map[corev1.ResourceName]resource.Quantity{}
for resourcename, value := range r.Resource {
requestMap[r.ContainerName][resourcename] = value
}
}
recommendationMap := map[string]map[corev1.ResourceName]resource.Quantity{}
for _, perContainer := range tortoise.Status.Conditions.ContainerRecommendationFromVPA {
recommendationMap[perContainer.ContainerName] = map[corev1.ResourceName]resource.Quantity{}
for k, perResource := range perContainer.MaxRecommendation {
recommendationMap[perContainer.ContainerName][k] = perResource.Quantity
}
}
newHPATargetUtilizationRecommendationPerContainer := []v1beta3.HPATargetUtilizationRecommendationPerContainer{}
for _, r := range tortoise.Status.AutoscalingPolicy {
recommendedTargetUtilization := map[corev1.ResourceName]int32{}
reqmap, ok := requestMap[r.ContainerName]
if !ok {
logger.Error(nil, fmt.Sprintf("no resource request on the container %s", r.ContainerName))
continue
}
for k, p := range r.Policy {
if p != v1beta3.AutoscalingTypeHorizontal {
// nothing to do.
continue
}
req, ok := reqmap[k]
if !ok {
logger.Error(nil, fmt.Sprintf("no %s request on the container %s", k, r.ContainerName))
continue
}
currentTargetValue, err := hpaservice.GetHPATargetValue(ctx, hpa, r.ContainerName, k)
if err != nil {
return tortoise, fmt.Errorf("try to find the metric for the conainter which is configured to be scale by Horizontal: %w", err)
}
recomMap, ok := recommendationMap[r.ContainerName]
if !ok {
return tortoise, fmt.Errorf("no resource recommendation from VPA for the container %s", r.ContainerName)
}
recom, ok := recomMap[k]
if !ok {
return tortoise, fmt.Errorf("no %s recommendation from VPA for the container %s", k, r.ContainerName)
}
upperUsage := math.Ceil((float64(recom.MilliValue()) / float64(req.MilliValue())) * 100)
reason := ""
if currentTargetValue > int32(upperUsage) {
// upperUsage is less than targetValue.
// This case, there're some scenarios:
// - the container size is unbalanced. (one resource is very bigger than its consumption)
// - hitting minReplicas.
//
// And this case, rather than changing the target value, we'd like to change the container size.
recommendedTargetUtilization[k] = currentTargetValue // no change (except the current value exceeds maximumTargetResourceUtilization)
reason = "the current resource utilization is too small and it's due to unbalanced container size or minReplicas, so keep the current target utilization"
} else {
newRecom := updateRecommendedContainerBasedMetric(int32(upperUsage), currentTargetValue)
if newRecom <= 0 || newRecom > 100 {
logger.Error(nil, "generated recommended HPA target utilization is invalid, fallback to the current target value", "current target utilization", currentTargetValue, "recommended target utilization", newRecom, "upper usage", upperUsage, "container name", r.ContainerName, "resource name", k)
newRecom = currentTargetValue
reason = "the generated recommended HPA target utilization is invalid, fallback to the current target value"
} else {
reason = "generated recommendation is valid"
}
recommendedTargetUtilization[k] = newRecom
}
if recommendedTargetUtilization[k] > s.maximumTargetResourceUtilization {
reason = "the generated recommended HPA target utilization is too high, fallback to the upper target utilization"
recommendedTargetUtilization[k] = s.maximumTargetResourceUtilization
}
if recommendedTargetUtilization[k] < s.minimumTargetResourceUtilization {
reason = "the generated recommended HPA target utilization is too low, fallback to the lower target utilization"
recommendedTargetUtilization[k] = s.minimumTargetResourceUtilization
}
if currentTargetValue != recommendedTargetUtilization[k] {
s.eventRecorder.Event(tortoise, corev1.EventTypeNormal, event.RecommendationUpdated, fmt.Sprintf("The recommendation of HPA %v target utilization (%v) in Tortoise status is updated (%v%% → %v%%)", k, r.ContainerName, currentTargetValue, recommendedTargetUtilization[k]))
} else {
logger.Info("The recommendation of the container is not updated", "container name", r.ContainerName, "resource name", k, "reason", fmt.Sprintf("HPA target utilization %v%% → %v%%", currentTargetValue, recommendedTargetUtilization[k]))
}
logger.Info("HPA target utilization recommendation is generated", "current target utilization", currentTargetValue, "recommended target utilization", recommendedTargetUtilization[k], "upper usage", upperUsage, "container name", r.ContainerName, "resource name", k, "reason", reason)
}
newHPATargetUtilizationRecommendationPerContainer = append(newHPATargetUtilizationRecommendationPerContainer, v1beta3.HPATargetUtilizationRecommendationPerContainer{
ContainerName: r.ContainerName,
TargetUtilization: recommendedTargetUtilization,
})
}
tortoise.Status.Recommendations.Horizontal.TargetUtilizations = newHPATargetUtilizationRecommendationPerContainer
return tortoise, nil
}
func updateRecommendedContainerBasedMetric(upperUsage, currentTarget int32) int32 {
additionalResource := upperUsage - currentTarget
return 100 - additionalResource
}