diff --git a/pkg/controller/podautoscaler/horizontal.go b/pkg/controller/podautoscaler/horizontal.go index a9f4c0a883af0..183599f80cc03 100644 --- a/pkg/controller/podautoscaler/horizontal.go +++ b/pkg/controller/podautoscaler/horizontal.go @@ -18,6 +18,7 @@ package podautoscaler import ( "context" + "errors" "fmt" "math" "sync" @@ -27,7 +28,7 @@ import ( autoscalingv2 "k8s.io/api/autoscaling/v2" v1 "k8s.io/api/core/v1" apiequality "k8s.io/apimachinery/pkg/api/equality" - "k8s.io/apimachinery/pkg/api/errors" + k8serrors "k8s.io/apimachinery/pkg/api/errors" apimeta "k8s.io/apimachinery/pkg/api/meta" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -50,6 +51,7 @@ import ( "k8s.io/klog/v2" "k8s.io/kubernetes/pkg/controller" metricsclient "k8s.io/kubernetes/pkg/controller/podautoscaler/metrics" + "k8s.io/kubernetes/pkg/controller/podautoscaler/monitor" "k8s.io/kubernetes/pkg/controller/util/selectors" ) @@ -58,6 +60,13 @@ var ( scaleUpLimitMinimum = 4.0 ) +var ( + // errSpec is used to determine if the error comes from the spec of HPA object in reconcileAutoscaler. + // All such errors should have this error as a root error so that the upstream function can distinguish spec errors from internal errors. + // e.g., fmt.Errorf("invalid spec%w", errSpec) + errSpec error = errors.New("") +) + type timestampedRecommendation struct { recommendation int32 timestamp time.Time @@ -82,6 +91,8 @@ type HorizontalController struct { downscaleStabilisationWindow time.Duration + monitor monitor.Monitor + // hpaLister is able to list/get HPAs from the shared cache from the informer passed in to // NewHorizontalController. hpaLister autoscalinglisters.HorizontalPodAutoscalerLister @@ -139,6 +150,7 @@ func NewHorizontalController( scaleNamespacer: scaleNamespacer, hpaNamespacer: hpaNamespacer, downscaleStabilisationWindow: downscaleStabilisationWindow, + monitor: monitor.New(), queue: workqueue.NewNamedRateLimitingQueue(NewDefaultHPARateLimiter(resyncPeriod), "horizontalpodautoscaler"), mapper: mapper, recommendations: map[string][]timestampedRecommendation{}, @@ -174,6 +186,8 @@ func NewHorizontalController( ) hpaController.replicaCalc = replicaCalc + monitor.Register() + return hpaController } @@ -276,14 +290,17 @@ func (a *HorizontalController) processNextWorkItem(ctx context.Context) bool { } // computeReplicasForMetrics computes the desired number of replicas for the metric specifications listed in the HPA, -// returning the maximum of the computed replica counts, a description of the associated metric, and the statuses of +// returning the maximum of the computed replica counts, a description of the associated metric, and the statuses of // all metrics computed. +// It may return both valid metricDesiredReplicas and an error, +// when some metrics still work and HPA should perform scaling based on them. +// If HPA cannot do anything due to error, it returns -1 in metricDesiredReplicas as a failure signal. func (a *HorizontalController) computeReplicasForMetrics(ctx context.Context, hpa *autoscalingv2.HorizontalPodAutoscaler, scale *autoscalingv1.Scale, metricSpecs []autoscalingv2.MetricSpec) (replicas int32, metric string, statuses []autoscalingv2.MetricStatus, timestamp time.Time, err error) { selector, err := a.validateAndParseSelector(hpa, scale.Status.Selector) if err != nil { - return 0, "", nil, time.Time{}, err + return -1, "", nil, time.Time{}, err } specReplicas := scale.Spec.Replicas @@ -303,23 +320,29 @@ func (a *HorizontalController) computeReplicasForMetrics(ctx context.Context, hp invalidMetricError = err } invalidMetricsCount++ + continue } - if err == nil && (replicas == 0 || replicaCountProposal > replicas) { + if replicas == 0 || replicaCountProposal > replicas { timestamp = timestampProposal replicas = replicaCountProposal metric = metricNameProposal } } + if invalidMetricError != nil { + invalidMetricError = fmt.Errorf("invalid metrics (%v invalid out of %v), first error is: %v", invalidMetricsCount, len(metricSpecs), invalidMetricError) + } + // If all metrics are invalid or some are invalid and we would scale down, // return an error and set the condition of the hpa based on the first invalid metric. // Otherwise set the condition as scaling active as we're going to scale if invalidMetricsCount >= len(metricSpecs) || (invalidMetricsCount > 0 && replicas < specReplicas) { setCondition(hpa, invalidMetricCondition.Type, invalidMetricCondition.Status, invalidMetricCondition.Reason, invalidMetricCondition.Message) - return 0, "", statuses, time.Time{}, fmt.Errorf("invalid metrics (%v invalid out of %v), first error is: %v", invalidMetricsCount, len(metricSpecs), invalidMetricError) + return -1, "", statuses, time.Time{}, invalidMetricError } setCondition(hpa, autoscalingv2.ScalingActive, v1.ConditionTrue, "ValidMetricFound", "the HPA was able to successfully calculate a replica count from %s", metric) - return replicas, metric, statuses, timestamp, nil + + return replicas, metric, statuses, timestamp, invalidMetricError } // hpasControllingPodsUnderSelector returns a list of keys of all HPAs that control a given list of pods. @@ -445,8 +468,8 @@ func (a *HorizontalController) computeReplicasForMetric(ctx context.Context, hpa return 0, "", time.Time{}, condition, fmt.Errorf("failed to get %s external metric value: %v", spec.External.Metric.Name, err) } default: - errMsg := fmt.Sprintf("unknown metric source type %q", string(spec.Type)) - err = fmt.Errorf(errMsg) + // It shouldn't reach here as invalid metric source type is filtered out in the api-server's validation. + err = fmt.Errorf("unknown metric source type %q%w", string(spec.Type), errSpec) condition := a.getUnableComputeReplicaCountCondition(hpa, "InvalidMetricSourceType", err) return 0, "", time.Time{}, condition, err } @@ -462,7 +485,7 @@ func (a *HorizontalController) reconcileKey(ctx context.Context, key string) (de logger := klog.FromContext(ctx) hpa, err := a.hpaLister.HorizontalPodAutoscalers(namespace).Get(name) - if errors.IsNotFound(err) { + if k8serrors.IsNotFound(err) { logger.Info("Horizontal Pod Autoscaler has been deleted", "HPA", klog.KRef(namespace, name)) a.recommendationsLock.Lock() @@ -691,7 +714,23 @@ func (a *HorizontalController) recordInitialRecommendation(currentReplicas int32 } } -func (a *HorizontalController) reconcileAutoscaler(ctx context.Context, hpaShared *autoscalingv2.HorizontalPodAutoscaler, key string) error { +func (a *HorizontalController) reconcileAutoscaler(ctx context.Context, hpaShared *autoscalingv2.HorizontalPodAutoscaler, key string) (retErr error) { + // actionLabel is used to report which actions this reconciliation has taken. + actionLabel := monitor.ActionLabelNone + start := time.Now() + defer func() { + errorLabel := monitor.ErrorLabelNone + if retErr != nil { + // In case of error, set "internal" as default. + errorLabel = monitor.ErrorLabelInternal + } + if errors.Is(retErr, errSpec) { + errorLabel = monitor.ErrorLabelSpec + } + + a.monitor.ObserveReconciliationResult(actionLabel, errorLabel, time.Since(start)) + }() + // make a copy so that we never mutate the shared informer cache (conversion can mutate the object) hpa := hpaShared.DeepCopy() hpaStatusOriginal := hpa.Status.DeepCopy() @@ -705,7 +744,7 @@ func (a *HorizontalController) reconcileAutoscaler(ctx context.Context, hpaShare if err := a.updateStatusIfNeeded(ctx, hpaStatusOriginal, hpa); err != nil { utilruntime.HandleError(err) } - return fmt.Errorf("invalid API version in scale target reference: %v", err) + return fmt.Errorf("invalid API version in scale target reference: %v%w", err, errSpec) } targetGK := schema.GroupKind{ @@ -771,7 +810,9 @@ func (a *HorizontalController) reconcileAutoscaler(ctx context.Context, hpaShare } else { var metricTimestamp time.Time metricDesiredReplicas, metricName, metricStatuses, metricTimestamp, err = a.computeReplicasForMetrics(ctx, hpa, scale, hpa.Spec.Metrics) - if err != nil { + // computeReplicasForMetrics may return both non-zero metricDesiredReplicas and an error. + // That means some metrics still work and HPA should perform scaling based on them. + if err != nil && metricDesiredReplicas == -1 { a.setCurrentReplicasInStatus(hpa, currentReplicas) if err := a.updateStatusIfNeeded(ctx, hpaStatusOriginal, hpa); err != nil { utilruntime.HandleError(err) @@ -779,6 +820,10 @@ func (a *HorizontalController) reconcileAutoscaler(ctx context.Context, hpaShare a.eventRecorder.Event(hpa, v1.EventTypeWarning, "FailedComputeMetricsReplicas", err.Error()) return fmt.Errorf("failed to compute desired number of replicas based on listed metrics for %s: %v", reference, err) } + if err != nil { + // We proceed to scaling, but return this error from reconcileAutoscaler() finally. + retErr = err + } logger.V(4).Info("Proposing desired replicas", "desiredReplicas", metricDesiredReplicas, @@ -825,6 +870,12 @@ func (a *HorizontalController) reconcileAutoscaler(ctx context.Context, hpaShare "currentReplicas", currentReplicas, "desiredReplicas", desiredReplicas, "reason", rescaleReason) + + if desiredReplicas > currentReplicas { + actionLabel = monitor.ActionLabelScaleUp + } else { + actionLabel = monitor.ActionLabelScaleDown + } } else { logger.V(4).Info("Decided not to scale", "scaleTarget", reference, @@ -834,7 +885,14 @@ func (a *HorizontalController) reconcileAutoscaler(ctx context.Context, hpaShare } a.setStatus(hpa, currentReplicas, desiredReplicas, metricStatuses, rescale) - return a.updateStatusIfNeeded(ctx, hpaStatusOriginal, hpa) + + err = a.updateStatusIfNeeded(ctx, hpaStatusOriginal, hpa) + if err != nil { + // we can overwrite retErr in this case because it's an internal error. + return err + } + + return retErr } // stabilizeRecommendation: diff --git a/pkg/controller/podautoscaler/horizontal_test.go b/pkg/controller/podautoscaler/horizontal_test.go index b079251fcea9e..2c7342c38ea32 100644 --- a/pkg/controller/podautoscaler/horizontal_test.go +++ b/pkg/controller/podautoscaler/horizontal_test.go @@ -34,6 +34,7 @@ import ( "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/util/wait" "k8s.io/apimachinery/pkg/watch" "k8s.io/client-go/informers" "k8s.io/client-go/kubernetes/fake" @@ -43,6 +44,7 @@ import ( autoscalingapiv2 "k8s.io/kubernetes/pkg/apis/autoscaling/v2" "k8s.io/kubernetes/pkg/controller" "k8s.io/kubernetes/pkg/controller/podautoscaler/metrics" + "k8s.io/kubernetes/pkg/controller/podautoscaler/monitor" "k8s.io/kubernetes/pkg/controller/util/selectors" cmapi "k8s.io/metrics/pkg/apis/custom_metrics/v1beta2" emapi "k8s.io/metrics/pkg/apis/external_metrics/v1beta1" @@ -133,6 +135,10 @@ type testCase struct { // Channel with names of HPA objects which we have reconciled. processed chan string + // expected results reported to the mock monitor at first. + expectedReportedReconciliationActionLabel monitor.ActionLabel + expectedReportedReconciliationErrorLabel monitor.ErrorLabel + // Target resource information. resource *fakeResource @@ -667,7 +673,7 @@ func findCpuUtilization(metricStatus []autoscalingv2.MetricStatus) (utilization return nil } -func (tc *testCase) verifyResults(t *testing.T) { +func (tc *testCase) verifyResults(t *testing.T, m *mockMonitor) { tc.Lock() defer tc.Unlock() @@ -676,6 +682,10 @@ func (tc *testCase) verifyResults(t *testing.T) { if tc.verifyEvents { assert.Equal(t, tc.specReplicas != tc.expectedDesiredReplicas, tc.eventCreated, "an event should have been created only if we expected a change in replicas") } + + m.waitUntilRecorded(t) + assert.Equal(t, tc.expectedReportedReconciliationActionLabel, m.reconciliationActionLabels[0], "the reconciliation action should be recorded in monitor expectedly") + assert.Equal(t, tc.expectedReportedReconciliationErrorLabel, m.reconciliationErrorLabels[0], "the reconciliation error should be recorded in monitor expectedly") } func (tc *testCase) setupController(t *testing.T) (*HorizontalController, informers.SharedInformerFactory) { @@ -750,6 +760,8 @@ func (tc *testCase) setupController(t *testing.T) (*HorizontalController, inform hpaController.hpaSelectors = tc.hpaSelectors } + hpaController.monitor = &mockMonitor{} + return hpaController, informerFactory } @@ -787,7 +799,11 @@ func (tc *testCase) runTestWithController(t *testing.T, hpaController *Horizonta // Wait for HPA to be processed. <-tc.processed } - tc.verifyResults(t) + m, ok := hpaController.monitor.(*mockMonitor) + if !ok { + t.Fatalf("test HPA controller should have mockMonitor, but actually not") + } + tc.verifyResults(t, m) } func (tc *testCase) runTest(t *testing.T) { @@ -795,6 +811,35 @@ func (tc *testCase) runTest(t *testing.T) { tc.runTestWithController(t, hpaController, informerFactory) } +// mockMonitor implements monitor.Monitor interface. +// It records which results are observed in slices. +type mockMonitor struct { + sync.RWMutex + reconciliationActionLabels []monitor.ActionLabel + reconciliationErrorLabels []monitor.ErrorLabel +} + +func (m *mockMonitor) ObserveReconciliationResult(action monitor.ActionLabel, err monitor.ErrorLabel, _ time.Duration) { + m.Lock() + defer m.Unlock() + m.reconciliationActionLabels = append(m.reconciliationActionLabels, action) + m.reconciliationErrorLabels = append(m.reconciliationErrorLabels, err) +} + +// waitUntilRecorded waits for the HPA controller to reconcile at least once. +func (m *mockMonitor) waitUntilRecorded(t *testing.T) { + if err := wait.Poll(20*time.Millisecond, 100*time.Millisecond, func() (done bool, err error) { + m.RWMutex.RLock() + defer m.RWMutex.RUnlock() + if len(m.reconciliationActionLabels) == 0 || len(m.reconciliationErrorLabels) == 0 { + return false, nil + } + return true, nil + }); err != nil { + t.Fatalf("no reconciliation is recorded in the monitor, len(monitor.reconciliationActionLabels)=%v len(monitor.reconciliationErrorLabels)=%v ", len(m.reconciliationActionLabels), len(m.reconciliationErrorLabels)) + } +} + func TestScaleUp(t *testing.T) { tc := testCase{ minReplicas: 2, @@ -807,6 +852,8 @@ func TestScaleUp(t *testing.T) { reportedLevels: []uint64{300, 500, 700}, reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")}, useMetricsAPI: true, + expectedReportedReconciliationActionLabel: monitor.ActionLabelScaleUp, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -829,10 +876,12 @@ func TestScaleUpContainer(t *testing.T) { Container: "container1", }, }}, - reportedLevels: []uint64{300, 500, 700}, - reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")}, - useMetricsAPI: true, - containerResourceMetricsEnabled: true, + reportedLevels: []uint64{300, 500, 700}, + reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")}, + useMetricsAPI: true, + containerResourceMetricsEnabled: true, + expectedReportedReconciliationActionLabel: monitor.ActionLabelScaleUp, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -862,6 +911,8 @@ func TestContainerMetricWithTheFeatureGateDisabled(t *testing.T) { }}, reportedLevels: []uint64{300, 400, 500}, reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")}, + expectedReportedReconciliationActionLabel: monitor.ActionLabelScaleUp, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelInternal, } tc.runTest(t) @@ -881,6 +932,8 @@ func TestScaleUpUnreadyLessScale(t *testing.T) { reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")}, reportedPodReadiness: []v1.ConditionStatus{v1.ConditionFalse, v1.ConditionTrue, v1.ConditionTrue}, useMetricsAPI: true, + expectedReportedReconciliationActionLabel: monitor.ActionLabelScaleUp, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -899,6 +952,8 @@ func TestScaleUpHotCpuLessScale(t *testing.T) { reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")}, reportedPodStartTime: []metav1.Time{hotCPUCreationTime(), coolCPUCreationTime(), coolCPUCreationTime()}, useMetricsAPI: true, + expectedReportedReconciliationActionLabel: monitor.ActionLabelScaleUp, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -922,6 +977,8 @@ func TestScaleUpUnreadyNoScale(t *testing.T) { Status: v1.ConditionTrue, Reason: "ReadyForNewScale", }), + expectedReportedReconciliationActionLabel: monitor.ActionLabelNone, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -946,6 +1003,8 @@ func TestScaleUpHotCpuNoScale(t *testing.T) { Status: v1.ConditionTrue, Reason: "ReadyForNewScale", }), + expectedReportedReconciliationActionLabel: monitor.ActionLabelNone, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -965,6 +1024,8 @@ func TestScaleUpIgnoresFailedPods(t *testing.T) { reportedPodReadiness: []v1.ConditionStatus{v1.ConditionTrue, v1.ConditionTrue, v1.ConditionFalse, v1.ConditionFalse}, reportedPodPhase: []v1.PodPhase{v1.PodRunning, v1.PodRunning, v1.PodFailed, v1.PodFailed}, useMetricsAPI: true, + expectedReportedReconciliationActionLabel: monitor.ActionLabelScaleUp, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -985,6 +1046,8 @@ func TestScaleUpIgnoresDeletionPods(t *testing.T) { reportedPodPhase: []v1.PodPhase{v1.PodRunning, v1.PodRunning, v1.PodRunning, v1.PodRunning}, reportedPodDeletionTimestamp: []bool{false, false, true, true}, useMetricsAPI: true, + expectedReportedReconciliationActionLabel: monitor.ActionLabelScaleUp, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -1006,6 +1069,8 @@ func TestScaleUpDeployment(t *testing.T) { apiVersion: "apps/v1", kind: "Deployment", }, + expectedReportedReconciliationActionLabel: monitor.ActionLabelScaleUp, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -1027,6 +1092,8 @@ func TestScaleUpReplicaSet(t *testing.T) { apiVersion: "apps/v1", kind: "ReplicaSet", }, + expectedReportedReconciliationActionLabel: monitor.ActionLabelScaleUp, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -1056,6 +1123,8 @@ func TestScaleUpCM(t *testing.T) { }, reportedLevels: []uint64{20000, 10000, 30000}, reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")}, + expectedReportedReconciliationActionLabel: monitor.ActionLabelScaleUp, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -1083,10 +1152,12 @@ func TestScaleUpCMUnreadyAndHotCpuNoLessScale(t *testing.T) { }, }, }, - reportedLevels: []uint64{50000, 10000, 30000}, - reportedPodReadiness: []v1.ConditionStatus{v1.ConditionTrue, v1.ConditionTrue, v1.ConditionFalse}, - reportedPodStartTime: []metav1.Time{coolCPUCreationTime(), coolCPUCreationTime(), hotCPUCreationTime()}, - reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")}, + reportedLevels: []uint64{50000, 10000, 30000}, + reportedPodReadiness: []v1.ConditionStatus{v1.ConditionTrue, v1.ConditionTrue, v1.ConditionFalse}, + reportedPodStartTime: []metav1.Time{coolCPUCreationTime(), coolCPUCreationTime(), hotCPUCreationTime()}, + reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")}, + expectedReportedReconciliationActionLabel: monitor.ActionLabelScaleUp, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -1127,6 +1198,8 @@ func TestScaleUpCMUnreadyandCpuHot(t *testing.T) { Status: v1.ConditionTrue, Reason: "TooManyReplicas", }), + expectedReportedReconciliationActionLabel: monitor.ActionLabelScaleUp, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -1166,6 +1239,8 @@ func TestScaleUpHotCpuNoScaleWouldScaleDown(t *testing.T) { Status: v1.ConditionTrue, Reason: "TooManyReplicas", }), + expectedReportedReconciliationActionLabel: monitor.ActionLabelScaleUp, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -1199,6 +1274,8 @@ func TestScaleUpCMObject(t *testing.T) { }, }, reportedLevels: []uint64{20000}, + expectedReportedReconciliationActionLabel: monitor.ActionLabelScaleUp, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -1232,6 +1309,8 @@ func TestScaleUpFromZeroCMObject(t *testing.T) { }, }, reportedLevels: []uint64{20000}, + expectedReportedReconciliationActionLabel: monitor.ActionLabelScaleUp, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -1265,6 +1344,8 @@ func TestScaleUpFromZeroIgnoresToleranceCMObject(t *testing.T) { }, }, reportedLevels: []uint64{1000}, + expectedReportedReconciliationActionLabel: monitor.ActionLabelScaleUp, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -1298,6 +1379,8 @@ func TestScaleUpPerPodCMObject(t *testing.T) { }, }, reportedLevels: []uint64{40000}, + expectedReportedReconciliationActionLabel: monitor.ActionLabelScaleUp, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -1325,6 +1408,8 @@ func TestScaleUpCMExternal(t *testing.T) { }, }, reportedLevels: []uint64{8600}, + expectedReportedReconciliationActionLabel: monitor.ActionLabelScaleUp, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -1352,6 +1437,8 @@ func TestScaleUpPerPodCMExternal(t *testing.T) { }, }, reportedLevels: []uint64{8600}, + expectedReportedReconciliationActionLabel: monitor.ActionLabelScaleUp, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -1369,6 +1456,8 @@ func TestScaleDown(t *testing.T) { reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")}, useMetricsAPI: true, recommendations: []timestampedRecommendation{}, + expectedReportedReconciliationActionLabel: monitor.ActionLabelScaleDown, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -1393,9 +1482,11 @@ func TestScaleDownContainerResource(t *testing.T) { }, }, }}, - useMetricsAPI: true, - recommendations: []timestampedRecommendation{}, - containerResourceMetricsEnabled: true, + useMetricsAPI: true, + containerResourceMetricsEnabled: true, + recommendations: []timestampedRecommendation{}, + expectedReportedReconciliationActionLabel: monitor.ActionLabelScaleDown, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -1414,6 +1505,8 @@ func TestScaleDownWithScalingRules(t *testing.T) { reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")}, useMetricsAPI: true, recommendations: []timestampedRecommendation{}, + expectedReportedReconciliationActionLabel: monitor.ActionLabelScaleDown, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -1434,6 +1527,8 @@ func TestScaleUpOneMetricInvalid(t *testing.T) { }, reportedLevels: []uint64{300, 400, 500}, reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")}, + expectedReportedReconciliationActionLabel: monitor.ActionLabelScaleUp, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelInternal, } tc.runTest(t) } @@ -1455,6 +1550,8 @@ func TestScaleUpFromZeroOneMetricInvalid(t *testing.T) { reportedLevels: []uint64{300, 400, 500}, reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")}, recommendations: []timestampedRecommendation{}, + expectedReportedReconciliationActionLabel: monitor.ActionLabelScaleUp, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelInternal, } tc.runTest(t) } @@ -1478,6 +1575,8 @@ func TestScaleUpBothMetricsEmpty(t *testing.T) { // Switch to missing {Type: autoscalingv2.AbleToScale, Status: v1.ConditionTrue, Reason: "SucceededGetScale"}, {Type: autoscalingv2.ScalingActive, Status: v1.ConditionFalse, Reason: "InvalidMetricSourceType"}, }, + expectedReportedReconciliationActionLabel: monitor.ActionLabelNone, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelInternal, } tc.runTest(t) } @@ -1504,6 +1603,8 @@ func TestScaleDownStabilizeInitialSize(t *testing.T) { Status: v1.ConditionTrue, Reason: "ScaleDownStabilized", }), + expectedReportedReconciliationActionLabel: monitor.ActionLabelNone, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -1534,6 +1635,8 @@ func TestScaleDownCM(t *testing.T) { reportedLevels: []uint64{12000, 12000, 12000, 12000, 12000}, reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")}, recommendations: []timestampedRecommendation{}, + expectedReportedReconciliationActionLabel: monitor.ActionLabelScaleDown, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -1569,6 +1672,8 @@ func TestScaleDownCMObject(t *testing.T) { reportedLevels: []uint64{12000}, reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")}, recommendations: []timestampedRecommendation{}, + expectedReportedReconciliationActionLabel: monitor.ActionLabelScaleDown, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -1604,6 +1709,8 @@ func TestScaleDownToZeroCMObject(t *testing.T) { reportedLevels: []uint64{0}, reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")}, recommendations: []timestampedRecommendation{}, + expectedReportedReconciliationActionLabel: monitor.ActionLabelScaleDown, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -1639,6 +1746,8 @@ func TestScaleDownPerPodCMObject(t *testing.T) { reportedLevels: []uint64{60000}, reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")}, recommendations: []timestampedRecommendation{}, + expectedReportedReconciliationActionLabel: monitor.ActionLabelScaleDown, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -1667,6 +1776,8 @@ func TestScaleDownCMExternal(t *testing.T) { }, reportedLevels: []uint64{8600}, recommendations: []timestampedRecommendation{}, + expectedReportedReconciliationActionLabel: monitor.ActionLabelScaleDown, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -1695,6 +1806,8 @@ func TestScaleDownToZeroCMExternal(t *testing.T) { }, reportedLevels: []uint64{0}, recommendations: []timestampedRecommendation{}, + expectedReportedReconciliationActionLabel: monitor.ActionLabelScaleDown, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -1723,6 +1836,8 @@ func TestScaleDownPerPodCMExternal(t *testing.T) { }, reportedLevels: []uint64{8600}, recommendations: []timestampedRecommendation{}, + expectedReportedReconciliationActionLabel: monitor.ActionLabelScaleDown, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -1742,6 +1857,8 @@ func TestScaleDownIncludeUnreadyPods(t *testing.T) { useMetricsAPI: true, reportedPodReadiness: []v1.ConditionStatus{v1.ConditionTrue, v1.ConditionTrue, v1.ConditionTrue, v1.ConditionFalse, v1.ConditionFalse}, recommendations: []timestampedRecommendation{}, + expectedReportedReconciliationActionLabel: monitor.ActionLabelScaleDown, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -1761,6 +1878,8 @@ func TestScaleDownIgnoreHotCpuPods(t *testing.T) { useMetricsAPI: true, reportedPodStartTime: []metav1.Time{coolCPUCreationTime(), coolCPUCreationTime(), coolCPUCreationTime(), hotCPUCreationTime(), hotCPUCreationTime()}, recommendations: []timestampedRecommendation{}, + expectedReportedReconciliationActionLabel: monitor.ActionLabelScaleDown, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -1781,6 +1900,8 @@ func TestScaleDownIgnoresFailedPods(t *testing.T) { reportedPodReadiness: []v1.ConditionStatus{v1.ConditionTrue, v1.ConditionTrue, v1.ConditionTrue, v1.ConditionTrue, v1.ConditionTrue, v1.ConditionFalse, v1.ConditionFalse}, reportedPodPhase: []v1.PodPhase{v1.PodRunning, v1.PodRunning, v1.PodRunning, v1.PodRunning, v1.PodRunning, v1.PodFailed, v1.PodFailed}, recommendations: []timestampedRecommendation{}, + expectedReportedReconciliationActionLabel: monitor.ActionLabelScaleDown, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -1802,6 +1923,8 @@ func TestScaleDownIgnoresDeletionPods(t *testing.T) { reportedPodPhase: []v1.PodPhase{v1.PodRunning, v1.PodRunning, v1.PodRunning, v1.PodRunning, v1.PodRunning, v1.PodRunning, v1.PodRunning}, reportedPodDeletionTimestamp: []bool{false, false, false, false, false, true, true}, recommendations: []timestampedRecommendation{}, + expectedReportedReconciliationActionLabel: monitor.ActionLabelScaleDown, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -1822,6 +1945,8 @@ func TestTolerance(t *testing.T) { Status: v1.ConditionTrue, Reason: "ReadyForNewScale", }), + expectedReportedReconciliationActionLabel: monitor.ActionLabelNone, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -1855,6 +1980,8 @@ func TestToleranceCM(t *testing.T) { Status: v1.ConditionTrue, Reason: "ReadyForNewScale", }), + expectedReportedReconciliationActionLabel: monitor.ActionLabelNone, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -1893,6 +2020,8 @@ func TestToleranceCMObject(t *testing.T) { Status: v1.ConditionTrue, Reason: "ReadyForNewScale", }), + expectedReportedReconciliationActionLabel: monitor.ActionLabelNone, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -1925,6 +2054,8 @@ func TestToleranceCMExternal(t *testing.T) { Status: v1.ConditionTrue, Reason: "ReadyForNewScale", }), + expectedReportedReconciliationActionLabel: monitor.ActionLabelNone, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -1962,6 +2093,8 @@ func TestTolerancePerPodCMObject(t *testing.T) { Status: v1.ConditionTrue, Reason: "ReadyForNewScale", }), + expectedReportedReconciliationActionLabel: monitor.ActionLabelNone, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -1994,6 +2127,8 @@ func TestTolerancePerPodCMExternal(t *testing.T) { Status: v1.ConditionTrue, Reason: "ReadyForNewScale", }), + expectedReportedReconciliationActionLabel: monitor.ActionLabelNone, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -2015,6 +2150,8 @@ func TestMinReplicas(t *testing.T) { Reason: "TooFewReplicas", }), recommendations: []timestampedRecommendation{}, + expectedReportedReconciliationActionLabel: monitor.ActionLabelScaleDown, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -2036,6 +2173,8 @@ func TestZeroMinReplicasDesiredZero(t *testing.T) { Reason: "DesiredWithinRange", }), recommendations: []timestampedRecommendation{}, + expectedReportedReconciliationActionLabel: monitor.ActionLabelScaleDown, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -2057,6 +2196,8 @@ func TestMinReplicasDesiredZero(t *testing.T) { Reason: "TooFewReplicas", }), recommendations: []timestampedRecommendation{}, + expectedReportedReconciliationActionLabel: monitor.ActionLabelScaleDown, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -2076,6 +2217,8 @@ func TestZeroReplicas(t *testing.T) { {Type: autoscalingv2.AbleToScale, Status: v1.ConditionTrue, Reason: "SucceededGetScale"}, {Type: autoscalingv2.ScalingActive, Status: v1.ConditionFalse, Reason: "ScalingDisabled"}, }, + expectedReportedReconciliationActionLabel: monitor.ActionLabelNone, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -2094,6 +2237,8 @@ func TestTooFewReplicas(t *testing.T) { expectedConditions: []autoscalingv2.HorizontalPodAutoscalerCondition{ {Type: autoscalingv2.AbleToScale, Status: v1.ConditionTrue, Reason: "SucceededRescale"}, }, + expectedReportedReconciliationActionLabel: monitor.ActionLabelScaleUp, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -2112,6 +2257,8 @@ func TestTooManyReplicas(t *testing.T) { expectedConditions: []autoscalingv2.HorizontalPodAutoscalerCondition{ {Type: autoscalingv2.AbleToScale, Status: v1.ConditionTrue, Reason: "SucceededRescale"}, }, + expectedReportedReconciliationActionLabel: monitor.ActionLabelScaleDown, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -2132,6 +2279,8 @@ func TestMaxReplicas(t *testing.T) { Status: v1.ConditionTrue, Reason: "TooManyReplicas", }), + expectedReportedReconciliationActionLabel: monitor.ActionLabelScaleUp, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -2152,6 +2301,8 @@ func TestSuperfluousMetrics(t *testing.T) { Status: v1.ConditionTrue, Reason: "TooManyReplicas", }), + expectedReportedReconciliationActionLabel: monitor.ActionLabelScaleUp, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -2168,6 +2319,8 @@ func TestMissingMetrics(t *testing.T) { reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")}, useMetricsAPI: true, recommendations: []timestampedRecommendation{}, + expectedReportedReconciliationActionLabel: monitor.ActionLabelScaleDown, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -2187,6 +2340,8 @@ func TestEmptyMetrics(t *testing.T) { {Type: autoscalingv2.AbleToScale, Status: v1.ConditionTrue, Reason: "SucceededGetScale"}, {Type: autoscalingv2.ScalingActive, Status: v1.ConditionFalse, Reason: "FailedGetResourceMetric"}, }, + expectedReportedReconciliationActionLabel: monitor.ActionLabelNone, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelInternal, } tc.runTest(t) } @@ -2206,6 +2361,8 @@ func TestEmptyCPURequest(t *testing.T) { {Type: autoscalingv2.AbleToScale, Status: v1.ConditionTrue, Reason: "SucceededGetScale"}, {Type: autoscalingv2.ScalingActive, Status: v1.ConditionFalse, Reason: "FailedGetResourceMetric"}, }, + expectedReportedReconciliationActionLabel: monitor.ActionLabelNone, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelInternal, } tc.runTest(t) } @@ -2222,6 +2379,8 @@ func TestEventCreated(t *testing.T) { reportedCPURequests: []resource.Quantity{resource.MustParse("0.2")}, verifyEvents: true, useMetricsAPI: true, + expectedReportedReconciliationActionLabel: monitor.ActionLabelScaleUp, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -2243,6 +2402,8 @@ func TestEventNotCreated(t *testing.T) { Status: v1.ConditionTrue, Reason: "ReadyForNewScale", }), + expectedReportedReconciliationActionLabel: monitor.ActionLabelNone, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -2259,6 +2420,8 @@ func TestMissingReports(t *testing.T) { reportedCPURequests: []resource.Quantity{resource.MustParse("0.2")}, useMetricsAPI: true, recommendations: []timestampedRecommendation{}, + expectedReportedReconciliationActionLabel: monitor.ActionLabelScaleDown, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -2281,6 +2444,8 @@ func TestUpscaleCap(t *testing.T) { Status: v1.ConditionTrue, Reason: "ScaleUpLimit", }), + expectedReportedReconciliationActionLabel: monitor.ActionLabelScaleUp, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -2304,6 +2469,8 @@ func TestUpscaleCapGreaterThanMaxReplicas(t *testing.T) { Status: v1.ConditionTrue, Reason: "TooManyReplicas", }), + expectedReportedReconciliationActionLabel: monitor.ActionLabelScaleUp, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -2330,6 +2497,8 @@ func TestMoreReplicasThanSpecNoScale(t *testing.T) { Status: v1.ConditionTrue, Reason: "ReadyForNewScale", }), + expectedReportedReconciliationActionLabel: monitor.ActionLabelNone, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -2357,6 +2526,8 @@ func TestConditionInvalidSelectorMissing(t *testing.T) { Reason: "InvalidSelector", }, }, + expectedReportedReconciliationActionLabel: monitor.ActionLabelNone, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelInternal, } _, _, _, _, testScaleClient := tc.prepareTestClient(t) @@ -2403,6 +2574,8 @@ func TestConditionInvalidSelectorUnparsable(t *testing.T) { Reason: "InvalidSelector", }, }, + expectedReportedReconciliationActionLabel: monitor.ActionLabelNone, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelInternal, } _, _, _, _, testScaleClient := tc.prepareTestClient(t) @@ -2442,6 +2615,8 @@ func TestConditionNoAmbiguousSelectorWhenNoSelectorOverlapBetweenHPAs(t *testing reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")}, useMetricsAPI: true, hpaSelectors: hpaSelectors, + expectedReportedReconciliationActionLabel: monitor.ActionLabelScaleUp, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -2473,6 +2648,8 @@ func TestConditionAmbiguousSelectorWhenFullSelectorOverlapBetweenHPAs(t *testing }, }, hpaSelectors: hpaSelectors, + expectedReportedReconciliationActionLabel: monitor.ActionLabelNone, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelInternal, } tc.runTest(t) } @@ -2504,6 +2681,8 @@ func TestConditionAmbiguousSelectorWhenPartialSelectorOverlapBetweenHPAs(t *test }, }, hpaSelectors: hpaSelectors, + expectedReportedReconciliationActionLabel: monitor.ActionLabelNone, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelInternal, } testClient, _, _, _, _ := tc.prepareTestClient(t) @@ -2599,6 +2778,8 @@ func TestConditionFailedGetMetrics(t *testing.T) { reportedLevels: []uint64{100, 200, 300}, reportedCPURequests: []resource.Quantity{resource.MustParse("0.1"), resource.MustParse("0.1"), resource.MustParse("0.1")}, useMetricsAPI: true, + expectedReportedReconciliationActionLabel: monitor.ActionLabelNone, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelInternal, } _, testMetricsClient, testCMClient, testEMClient, _ := tc.prepareTestClient(t) tc.testMetricsClient = testMetricsClient @@ -2655,6 +2836,8 @@ func TestConditionInvalidSourceType(t *testing.T) { Reason: "InvalidMetricSourceType", }, }, + expectedReportedReconciliationActionLabel: monitor.ActionLabelNone, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelInternal, } tc.runTest(t) } @@ -2677,6 +2860,8 @@ func TestConditionFailedGetScale(t *testing.T) { Reason: "FailedGetScale", }, }, + expectedReportedReconciliationActionLabel: monitor.ActionLabelNone, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelInternal, } _, _, _, _, testScaleClient := tc.prepareTestClient(t) @@ -2705,6 +2890,8 @@ func TestConditionFailedUpdateScale(t *testing.T) { Status: v1.ConditionFalse, Reason: "FailedUpdateScale", }), + expectedReportedReconciliationActionLabel: monitor.ActionLabelNone, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelInternal, } _, _, _, _, testScaleClient := tc.prepareTestClient(t) @@ -2758,6 +2945,8 @@ func TestNoBackoffUpscaleCM(t *testing.T) { Status: v1.ConditionFalse, Reason: "DesiredWithinRange", }), + expectedReportedReconciliationActionLabel: monitor.ActionLabelScaleUp, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -2803,6 +2992,8 @@ func TestNoBackoffUpscaleCMNoBackoffCpu(t *testing.T) { Status: v1.ConditionTrue, Reason: "TooManyReplicas", }), + expectedReportedReconciliationActionLabel: monitor.ActionLabelScaleUp, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -2831,6 +3022,8 @@ func TestStabilizeDownscale(t *testing.T) { {10, time.Now().Add(-10 * time.Minute)}, {4, time.Now().Add(-1 * time.Minute)}, }, + expectedReportedReconciliationActionLabel: monitor.ActionLabelNone, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -2892,6 +3085,8 @@ func TestComputedToleranceAlgImplementation(t *testing.T) { }, useMetricsAPI: true, recommendations: []timestampedRecommendation{}, + expectedReportedReconciliationActionLabel: monitor.ActionLabelScaleDown, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc1.runTest(t) @@ -2935,6 +3130,8 @@ func TestComputedToleranceAlgImplementation(t *testing.T) { Status: v1.ConditionTrue, Reason: "ReadyForNewScale", }), + expectedReportedReconciliationActionLabel: monitor.ActionLabelNone, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc2.runTest(t) } @@ -2955,6 +3152,8 @@ func TestScaleUpRCImmediately(t *testing.T) { expectedConditions: []autoscalingv2.HorizontalPodAutoscalerCondition{ {Type: autoscalingv2.AbleToScale, Status: v1.ConditionTrue, Reason: "SucceededRescale"}, }, + expectedReportedReconciliationActionLabel: monitor.ActionLabelScaleUp, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -2975,6 +3174,8 @@ func TestScaleDownRCImmediately(t *testing.T) { expectedConditions: []autoscalingv2.HorizontalPodAutoscalerCondition{ {Type: autoscalingv2.AbleToScale, Status: v1.ConditionTrue, Reason: "SucceededRescale"}, }, + expectedReportedReconciliationActionLabel: monitor.ActionLabelScaleDown, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } tc.runTest(t) } @@ -2996,6 +3197,8 @@ func TestAvoidUnnecessaryUpdates(t *testing.T) { useMetricsAPI: true, lastScaleTime: &now, recommendations: []timestampedRecommendation{}, + expectedReportedReconciliationActionLabel: monitor.ActionLabelNone, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, } testClient, _, _, _, _ := tc.prepareTestClient(t) tc.testClient = testClient @@ -4253,6 +4456,8 @@ func TestScaleUpOneMetricEmpty(t *testing.T) { }, reportedLevels: []uint64{300, 400, 500}, reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")}, + expectedReportedReconciliationActionLabel: monitor.ActionLabelScaleUp, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelInternal, } _, _, _, testEMClient, _ := tc.prepareTestClient(t) testEMClient.PrependReactor("list", "*", func(action core.Action) (handled bool, ret runtime.Object, err error) { @@ -4283,6 +4488,8 @@ func TestNoScaleDownOneMetricInvalid(t *testing.T) { {Type: autoscalingv2.AbleToScale, Status: v1.ConditionTrue, Reason: "SucceededGetScale"}, {Type: autoscalingv2.ScalingActive, Status: v1.ConditionFalse, Reason: "InvalidMetricSourceType"}, }, + expectedReportedReconciliationActionLabel: monitor.ActionLabelNone, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelInternal, } tc.runTest(t) @@ -4319,6 +4526,8 @@ func TestNoScaleDownOneMetricEmpty(t *testing.T) { {Type: autoscalingv2.AbleToScale, Status: v1.ConditionTrue, Reason: "SucceededGetScale"}, {Type: autoscalingv2.ScalingActive, Status: v1.ConditionFalse, Reason: "FailedGetExternalMetric"}, }, + expectedReportedReconciliationActionLabel: monitor.ActionLabelNone, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelInternal, } _, _, _, testEMClient, _ := tc.prepareTestClient(t) testEMClient.PrependReactor("list", "*", func(action core.Action) (handled bool, ret runtime.Object, err error) { diff --git a/pkg/controller/podautoscaler/monitor/metrics.go b/pkg/controller/podautoscaler/monitor/metrics.go new file mode 100644 index 0000000000000..c5a35bd6022b3 --- /dev/null +++ b/pkg/controller/podautoscaler/monitor/metrics.go @@ -0,0 +1,71 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// metrics packages contains metrics which are exposed from the HPA controller. +package monitor + +import ( + "sync" + + "k8s.io/component-base/metrics" + "k8s.io/component-base/metrics/legacyregistry" +) + +const ( + // hpaControllerSubsystem - subsystem name used by HPA controller + hpaControllerSubsystem = "horizontal_pod_autoscaler_controller" +) + +var ( + reconciliationsTotal = metrics.NewCounterVec( + &metrics.CounterOpts{ + Subsystem: hpaControllerSubsystem, + Name: "reconciliations_total", + Help: "Number of reconciliations of HPA controller. The label 'action' should be either 'scale_down', 'scale_up', or 'none'. Also, the label 'error' should be either 'spec', 'internal', or 'none'. Note that if both spec and internal errors happen during a reconciliation, the first one to occur is reported in `error` label.", + StabilityLevel: metrics.ALPHA, + }, []string{"action", "error"}) + + reconciliationsDuration = metrics.NewHistogramVec( + &metrics.HistogramOpts{ + Subsystem: hpaControllerSubsystem, + Name: "reconciliation_duration_seconds", + Help: "The time(seconds) that the HPA controller takes to reconcile once. The label 'action' should be either 'scale_down', 'scale_up', or 'none'. Also, the label 'error' should be either 'spec', 'internal', or 'none'. Note that if both spec and internal errors happen during a reconciliation, the first one to occur is reported in `error` label.", + Buckets: metrics.ExponentialBuckets(0.001, 2, 15), + StabilityLevel: metrics.ALPHA, + }, []string{"action", "error"}) + + metricsList = []metrics.Registerable{ + reconciliationsTotal, + reconciliationsDuration, + } +) + +var register sync.Once + +// Register all metrics. +func Register() { + // Register the metrics. + register.Do(func() { + registerMetrics(metricsList...) + }) +} + +// RegisterMetrics registers a list of metrics. +func registerMetrics(extraMetrics ...metrics.Registerable) { + for _, metric := range extraMetrics { + legacyregistry.MustRegister(metric) + } +} diff --git a/pkg/controller/podautoscaler/monitor/monitor.go b/pkg/controller/podautoscaler/monitor/monitor.go new file mode 100644 index 0000000000000..592ddc382fb34 --- /dev/null +++ b/pkg/controller/podautoscaler/monitor/monitor.go @@ -0,0 +1,51 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package monitor + +import "time" + +type ActionLabel string +type ErrorLabel string + +const ( + ActionLabelScaleUp ActionLabel = "scale_up" + ActionLabelScaleDown ActionLabel = "scale_down" + ActionLabelNone ActionLabel = "none" + + // ErrorLabelSpec represents an error due to an invalid spec of HPA object. + ErrorLabelSpec ErrorLabel = "spec" + // ErrorLabelInternal represents an error from an internal computation or communication with other component. + ErrorLabelInternal ErrorLabel = "internal" + ErrorLabelNone ErrorLabel = "none" +) + +// Monitor records some metrics so that people can monitor HPA controller. +type Monitor interface { + ObserveReconciliationResult(action ActionLabel, err ErrorLabel, duration time.Duration) +} + +type monitor struct{} + +func New() Monitor { + return &monitor{} +} + +// ObserveReconciliationResult observes some metrics from a reconciliation result. +func (r *monitor) ObserveReconciliationResult(action ActionLabel, err ErrorLabel, duration time.Duration) { + reconciliationsTotal.WithLabelValues(string(action), string(err)).Inc() + reconciliationsDuration.WithLabelValues(string(action), string(err)).Observe(duration.Seconds()) +}