From c8e250e4ebc0130e001cdd5f196af26ad209ffe7 Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Mon, 20 May 2024 18:20:47 -0700 Subject: [PATCH] tmp: Don't include buffer in Reserve() --- pkg/plugin/plugin.go | 9 ++++++++- pkg/plugin/state.go | 29 ++++++++++++++++++++++------- 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/pkg/plugin/plugin.go b/pkg/plugin/plugin.go index d69f18d5d..8dc8eb749 100644 --- a/pkg/plugin/plugin.go +++ b/pkg/plugin/plugin.go @@ -817,7 +817,14 @@ func (e *AutoscaleEnforcer) Reserve( return status } - ok, verdict, err := e.reserveResources(ctx, logger, pod, "Reserve", false) + ok, verdict, err := e.reserveResources(ctx, logger, pod, "Reserve", reserveOptions{ + // we *could* deny, but that's ultimately less reliable. + // For more, see https://github.com/neondatabase/autoscaling/issues/869 + allowDeny: false, + // don't include buffer because we know that future changes by the autoscaler-agent must go + // through us. + includeBuffer: false, + }) if err != nil { return framework.NewStatus(framework.UnschedulableAndUnresolvable, err.Error()) } diff --git a/pkg/plugin/state.go b/pkg/plugin/state.go index 5282c13aa..9f8db29dd 100644 --- a/pkg/plugin/state.go +++ b/pkg/plugin/state.go @@ -585,7 +585,18 @@ func (e *AutoscaleEnforcer) handleStarted(logger *zap.Logger, pod *corev1.Pod) { logger.Info("Handling Pod start event") - _, _, _ = e.reserveResources(context.TODO(), logger, pod, "Pod started", false) + _, _, _ = e.reserveResources(context.TODO(), logger, pod, "Pod started", reserveOptions{ + // pod already started, out of our control - we don't have a mechanism to deny it + allowDeny: false, + // this may be a preexisting VM. If so, we should include it in "buffer" as long it's + // supposed to be handled by us (otherwise, the "buffer" will never be resolved) + includeBuffer: pod.Spec.SchedulerName == e.state.conf.SchedulerName, + }) +} + +type reserveOptions struct { + allowDeny bool + includeBuffer bool } // reserveResources attempts to set aside resources on the node for the pod. @@ -601,7 +612,7 @@ func (e *AutoscaleEnforcer) reserveResources( logger *zap.Logger, pod *corev1.Pod, action string, - allowDeny bool, + opts reserveOptions, ) (ok bool, _ *verdictSet, _ error) { nodeName := pod.Spec.NodeName @@ -646,7 +657,7 @@ func (e *AutoscaleEnforcer) reserveResources( e.metrics.IncReserveShouldDeny(pod, node) } - if shouldDeny && allowDeny { + if shouldDeny && opts.allowDeny { cpuShortVerdict := "NOT ENOUGH" if add.VCPU <= node.remainingReservableCPU() { cpuShortVerdict = "OK" @@ -703,10 +714,14 @@ func (e *AutoscaleEnforcer) reserveResources( Max: vmInfo.Max().Mem, } - // If scaling isn't enabled *or* the pod is involved in an ongoing migration, then we can be - // more precise about usage (because scaling is forbidden while migrating). + // If scaling isn't enabled *or* the pod is involved in an ongoing migration *or* the caller + // has opted out of setting Buffer, then we can be more precise about usage. + // + // Buffer exists to handle scaling that may happen due to a prior scheduler's approval. + // If scaling is disabled, we don't have to worry about this, and if there's an ongoing + // migration, scaling is forbidden. migrating := util.TryPodOwnerVirtualMachineMigration(pod) != nil - if !vmInfo.Config.ScalingEnabled || migrating { + if !vmInfo.Config.ScalingEnabled || migrating || !opts.includeBuffer { cpuState.Buffer = 0 cpuState.Reserved = vmInfo.Using().VCPU memState.Buffer = 0 @@ -769,7 +784,7 @@ func (e *AutoscaleEnforcer) reserveResources( } } - if allowDeny { + if opts.allowDeny { logger.Info("Allowing reserve resources for Pod", zap.Object("verdict", verdict)) } else if shouldDeny /* but couldn't */ { logger.Warn("Reserved resources for Pod above totals", zap.Object("verdict", verdict))