Skip to content

Commit

Permalink
tmp: Don't include buffer in Reserve()
Browse files Browse the repository at this point in the history
  • Loading branch information
sharnoff committed May 21, 2024
1 parent 6555a77 commit c8e250e
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 8 deletions.
9 changes: 8 additions & 1 deletion pkg/plugin/plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -817,7 +817,14 @@ func (e *AutoscaleEnforcer) Reserve(
return status
}

ok, verdict, err := e.reserveResources(ctx, logger, pod, "Reserve", false)
ok, verdict, err := e.reserveResources(ctx, logger, pod, "Reserve", reserveOptions{
// we *could* deny, but that's ultimately less reliable.
// For more, see https://github.com/neondatabase/autoscaling/issues/869
allowDeny: false,
// don't include buffer because we know that future changes by the autoscaler-agent must go
// through us.
includeBuffer: false,
})
if err != nil {
return framework.NewStatus(framework.UnschedulableAndUnresolvable, err.Error())
}
Expand Down
29 changes: 22 additions & 7 deletions pkg/plugin/state.go
Original file line number Diff line number Diff line change
Expand Up @@ -585,7 +585,18 @@ func (e *AutoscaleEnforcer) handleStarted(logger *zap.Logger, pod *corev1.Pod) {

logger.Info("Handling Pod start event")

_, _, _ = e.reserveResources(context.TODO(), logger, pod, "Pod started", false)
_, _, _ = e.reserveResources(context.TODO(), logger, pod, "Pod started", reserveOptions{
// pod already started, out of our control - we don't have a mechanism to deny it
allowDeny: false,
// this may be a preexisting VM. If so, we should include it in "buffer" as long it's
// supposed to be handled by us (otherwise, the "buffer" will never be resolved)
includeBuffer: pod.Spec.SchedulerName == e.state.conf.SchedulerName,
})
}

type reserveOptions struct {
allowDeny bool
includeBuffer bool
}

// reserveResources attempts to set aside resources on the node for the pod.
Expand All @@ -601,7 +612,7 @@ func (e *AutoscaleEnforcer) reserveResources(
logger *zap.Logger,
pod *corev1.Pod,
action string,
allowDeny bool,
opts reserveOptions,
) (ok bool, _ *verdictSet, _ error) {
nodeName := pod.Spec.NodeName

Expand Down Expand Up @@ -646,7 +657,7 @@ func (e *AutoscaleEnforcer) reserveResources(
e.metrics.IncReserveShouldDeny(pod, node)
}

if shouldDeny && allowDeny {
if shouldDeny && opts.allowDeny {
cpuShortVerdict := "NOT ENOUGH"
if add.VCPU <= node.remainingReservableCPU() {
cpuShortVerdict = "OK"
Expand Down Expand Up @@ -703,10 +714,14 @@ func (e *AutoscaleEnforcer) reserveResources(
Max: vmInfo.Max().Mem,
}

// If scaling isn't enabled *or* the pod is involved in an ongoing migration, then we can be
// more precise about usage (because scaling is forbidden while migrating).
// If scaling isn't enabled *or* the pod is involved in an ongoing migration *or* the caller
// has opted out of setting Buffer, then we can be more precise about usage.
//
// Buffer exists to handle scaling that may happen due to a prior scheduler's approval.
// If scaling is disabled, we don't have to worry about this, and if there's an ongoing
// migration, scaling is forbidden.
migrating := util.TryPodOwnerVirtualMachineMigration(pod) != nil
if !vmInfo.Config.ScalingEnabled || migrating {
if !vmInfo.Config.ScalingEnabled || migrating || !opts.includeBuffer {
cpuState.Buffer = 0
cpuState.Reserved = vmInfo.Using().VCPU
memState.Buffer = 0
Expand Down Expand Up @@ -769,7 +784,7 @@ func (e *AutoscaleEnforcer) reserveResources(
}
}

if allowDeny {
if opts.allowDeny {
logger.Info("Allowing reserve resources for Pod", zap.Object("verdict", verdict))
} else if shouldDeny /* but couldn't */ {
logger.Warn("Reserved resources for Pod above totals", zap.Object("verdict", verdict))
Expand Down

0 comments on commit c8e250e

Please sign in to comment.