Skip to content

Commit

Permalink
koord-scheduler: support Node CPU orchestration API (#360)
Browse files Browse the repository at this point in the history
- support cpu bind policy with FullPCPUsOnly
- support NUMA allocate strategy with MostAllocated/LeastAllocated

Signed-off-by: Joseph <joseph.t.lee@outlook.com>
  • Loading branch information
eahydra committed Jul 14, 2022
1 parent 78a4ebb commit 8179245
Show file tree
Hide file tree
Showing 9 changed files with 190 additions and 41 deletions.
18 changes: 18 additions & 0 deletions apis/extension/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ import (
"encoding/json"

"k8s.io/apimachinery/pkg/types"

schedulingconfig "github.com/koordinator-sh/koordinator/apis/scheduling/config"
)

const (
Expand All @@ -30,6 +32,22 @@ const (
// AnnotationNodeCPUSharedPools describes the CPU Shared Pool defined by Koordinator.
// The shared pool is mainly used by Koordinator LS Pods or K8s Burstable Pods.
AnnotationNodeCPUSharedPools = NodeDomainPrefix + "/cpu-shared-pools"

// LabelNodeCPUBindPolicy constrains how to bind CPU logical CPUs when scheduling.
LabelNodeCPUBindPolicy = NodeDomainPrefix + "/cpu-bind-policy"
// LabelNodeNUMAAllocateStrategy indicates how to choose satisfied NUMA Nodes when scheduling.
LabelNodeNUMAAllocateStrategy = NodeDomainPrefix + "/numa-allocate-strategy"
)

const (
// NodeCPUBindPolicyFullPCPUsOnly requires that the scheduler must allocate full physical cores.
// Equivalent to kubelet CPU manager policy option full-pcpus-only=true.
NodeCPUBindPolicyFullPCPUsOnly = "FullPCPUsOnly"
)

const (
NodeNUMAAllocateStrategyLeastAllocated = string(schedulingconfig.NUMALeastAllocated)
NodeNUMAAllocateStrategyMostAllocated = string(schedulingconfig.NUMAMostAllocated)
)

type CPUTopology struct {
Expand Down
5 changes: 2 additions & 3 deletions apis/scheduling/config/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,8 @@ type ScoringStrategy struct {
type NodeNUMAResourceArgs struct {
metav1.TypeMeta

DefaultCPUBindPolicy CPUBindPolicy `json:"defaultCPUBindPolicy,omitempty"`
NUMAAllocateStrategy NUMAAllocateStrategy `json:"numaAllocateStrategy,omitempty"`
ScoringStrategy *ScoringStrategy `json:"scoringStrategy,omitempty"`
DefaultCPUBindPolicy CPUBindPolicy `json:"defaultCPUBindPolicy,omitempty"`
ScoringStrategy *ScoringStrategy `json:"scoringStrategy,omitempty"`
}

// CPUBindPolicy defines the CPU binding policy
Expand Down
4 changes: 0 additions & 4 deletions apis/scheduling/config/v1beta2/defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ var (
}

defaultPreferredCPUBindPolicy = CPUBindPolicyFullPCPUs
defaultNUMAAllocateStrategy = NUMAMostAllocated
defaultNodeNUMAResourceScoringStrategy = &ScoringStrategy{
Type: MostAllocated,
Resources: []schedconfig.ResourceSpec{
Expand Down Expand Up @@ -77,9 +76,6 @@ func SetDefaults_NodeNUMAResourceArgs(obj *NodeNUMAResourceArgs) {
if obj.DefaultCPUBindPolicy == "" {
obj.DefaultCPUBindPolicy = defaultPreferredCPUBindPolicy
}
if obj.NUMAAllocateStrategy == "" {
obj.NUMAAllocateStrategy = defaultNUMAAllocateStrategy
}
if obj.ScoringStrategy == nil {
obj.ScoringStrategy = defaultNodeNUMAResourceScoringStrategy
}
Expand Down
5 changes: 2 additions & 3 deletions apis/scheduling/config/v1beta2/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,8 @@ type ScoringStrategy struct {
type NodeNUMAResourceArgs struct {
metav1.TypeMeta

DefaultCPUBindPolicy CPUBindPolicy `json:"defaultCPUBindPolicy,omitempty"`
NUMAAllocateStrategy NUMAAllocateStrategy `json:"numaAllocateStrategy,omitempty"`
ScoringStrategy *ScoringStrategy `json:"scoringStrategy,omitempty"`
DefaultCPUBindPolicy CPUBindPolicy `json:"defaultCPUBindPolicy,omitempty"`
ScoringStrategy *ScoringStrategy `json:"scoringStrategy,omitempty"`
}

// CPUBindPolicy defines the CPU binding policy
Expand Down
2 changes: 0 additions & 2 deletions apis/scheduling/config/v1beta2/zz_generated.conversion.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -648,7 +648,6 @@ type CPUOrchestrationPluginArgs struct {

DefaultCPUBindPolicy CPUBindPolicy `json:"defaultCPUBindPolicy,omitempty"`
NUMATopologyAlignmentPolicy NUMATopologyAlignmentPolicy `json:"numaTopologyAlignmentPolicy,omitempty"`
NUMAAllocateStrategy NUMAAllocateStrategy `json:"numaAllocateStrategy,omitempty"`

ScoringStrategy ScoringStrategy `json:"scoringStrategy,omitempty"`
}
Expand Down Expand Up @@ -678,7 +677,6 @@ type ScoringStrategy struct {

- `DefaultCPUBindPolicy` represents the default bind policy. If not set, use `FullPCPUs` as default value.
- `NUMATopologyAlignmentPolicy` represents the default NUMA topology alignment policy, If not set, use `BestEffort` as default value.
- `NUMAAllocateStrategy` represents the default NUMA allocate strategy. If not set, use `MostAllocated` as default value.
- `ScoringStrategy` represents the node resource scoring strategy. If not set, use `MostAllocated` as default value.

## Alternatives
Expand Down
4 changes: 2 additions & 2 deletions pkg/scheduler/plugins/nodenumaresource/cpu_allocator.go
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ func newCPUAccumulator(
allocatedCPUs CPUDetails,
numCPUsNeeded int,
exclusivePolicy schedulingconfig.CPUExclusivePolicy,
numaSortStrategy schedulingconfig.NUMAAllocateStrategy,
numaAllocateStrategy schedulingconfig.NUMAAllocateStrategy,
) *cpuAccumulator {
exclusiveInCores := sets.NewInt()
exclusiveInNUMANodes := sets.NewInt()
Expand All @@ -225,7 +225,7 @@ func newCPUAccumulator(
exclusive: exclusive,
exclusivePolicy: exclusivePolicy,
numCPUsNeeded: numCPUsNeeded,
numaAllocateStrategy: numaSortStrategy,
numaAllocateStrategy: numaAllocateStrategy,
result: NewCPUSet(),
}
}
Expand Down
43 changes: 38 additions & 5 deletions pkg/scheduler/plugins/nodenumaresource/plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ const (
const (
ErrMissingNodeResourceTopology = "node(s) missing NodeResourceTopology"
ErrInvalidCPUTopology = "node(s) invalid CPU Topology"
ErrSMTAlignmentError = "node(s) requested cpus not multiple cpus per core"
ErrRequiredFullPCPUsPolicy = "node(s) required FullPCPUs policy"
)

var (
Expand Down Expand Up @@ -201,6 +203,15 @@ func (p *Plugin) Filter(ctx context.Context, cycleState *framework.CycleState, p
return framework.NewStatus(framework.UnschedulableAndUnresolvable, ErrInvalidCPUTopology)
}

if node.Labels[extension.LabelNodeCPUBindPolicy] == extension.NodeCPUBindPolicyFullPCPUsOnly {
if state.numCPUsNeeded%numaInfo.cpuTopology.CPUsPerCore() != 0 {
return framework.NewStatus(framework.UnschedulableAndUnresolvable, ErrSMTAlignmentError)
}
if state.preferredCPUBindPolicy != schedulingconfig.CPUBindPolicyFullPCPUs {
return framework.NewStatus(framework.UnschedulableAndUnresolvable, ErrRequiredFullPCPUsPolicy)
}
}

return nil
}

Expand Down Expand Up @@ -234,19 +245,31 @@ func (p *Plugin) Score(ctx context.Context, cycleState *framework.CycleState, po
return 0, nil
}

score := p.calcScore(state.numCPUsNeeded, state.preferredCPUBindPolicy, state.preferredCPUExclusivePolicy, numaInfo)
numaAllocateStrategy := p.getNUMAAllocateStrategy(node)
score := p.calcScore(numaInfo, state.numCPUsNeeded, state.preferredCPUBindPolicy, state.preferredCPUExclusivePolicy, numaAllocateStrategy)
return score, nil
}

func (p *Plugin) calcScore(numCPUsNeeded int, cpuBindPolicy schedulingconfig.CPUBindPolicy, cpuExclusivePolicy schedulingconfig.CPUExclusivePolicy, numaInfo *nodeNUMAInfo) int64 {
func (p *Plugin) getNUMAAllocateStrategy(node *corev1.Node) schedulingconfig.NUMAAllocateStrategy {
numaAllocateStrategy := schedulingconfig.NUMAMostAllocated
if p.pluginArgs.ScoringStrategy != nil && p.pluginArgs.ScoringStrategy.Type == schedulingconfig.LeastAllocated {
numaAllocateStrategy = schedulingconfig.NUMALeastAllocated
}
if val := schedulingconfig.NUMAAllocateStrategy(node.Labels[extension.LabelNodeNUMAAllocateStrategy]); val != "" {
numaAllocateStrategy = val
}
return numaAllocateStrategy
}

func (p *Plugin) calcScore(numaInfo *nodeNUMAInfo, numCPUsNeeded int, cpuBindPolicy schedulingconfig.CPUBindPolicy, cpuExclusivePolicy schedulingconfig.CPUExclusivePolicy, numaAllocateStrategy schedulingconfig.NUMAAllocateStrategy) int64 {
availableCPUs, allocated := getAvailableCPUsFunc(numaInfo)
acc := newCPUAccumulator(
numaInfo.cpuTopology,
availableCPUs,
allocated,
numCPUsNeeded,
cpuExclusivePolicy,
p.pluginArgs.NUMAAllocateStrategy,
numaAllocateStrategy,
)

var freeCPUs [][]int
Expand All @@ -265,7 +288,7 @@ func (p *Plugin) calcScore(numCPUsNeeded int, cpuBindPolicy schedulingconfig.CPU
}

scoreFn := mostRequestedScore
if p.pluginArgs.ScoringStrategy != nil && p.pluginArgs.ScoringStrategy.Type == schedulingconfig.LeastAllocated {
if numaAllocateStrategy == schedulingconfig.NUMALeastAllocated {
scoreFn = leastRequestedScore
}

Expand Down Expand Up @@ -335,6 +358,15 @@ func (p *Plugin) Reserve(ctx context.Context, cycleState *framework.CycleState,
return nil
}

nodeInfo, err := p.handle.SnapshotSharedLister().NodeInfos().Get(nodeName)
if err != nil {
return framework.NewStatus(framework.Error, fmt.Sprintf("getting node %q from Snapshot: %v", nodeName, err))
}
node := nodeInfo.Node()
if node == nil {
return framework.NewStatus(framework.Error, "node not found")
}

// The Pod requires the CPU to be allocated according to CPUBindPolicy,
// but the current node does not have a NodeResourceTopology or a valid CPUTopology,
// so this error should be exposed to the user
Expand All @@ -350,14 +382,15 @@ func (p *Plugin) Reserve(ctx context.Context, cycleState *framework.CycleState,
}

availableCPUs, allocated := getAvailableCPUsFunc(numaInfo)
numaAllocateStrategy := p.getNUMAAllocateStrategy(node)
result, err := takeCPUs(
numaInfo.cpuTopology,
availableCPUs,
allocated,
state.numCPUsNeeded,
state.preferredCPUBindPolicy,
state.resourceSpec.PreferredCPUExclusivePolicy,
p.pluginArgs.NUMAAllocateStrategy,
numaAllocateStrategy,
)
if err != nil {
return framework.NewStatus(framework.Error, err.Error())
Expand Down

0 comments on commit 8179245

Please sign in to comment.