Skip to content

Commit

Permalink
Improved multi-numa alignment in Topology Manager: implement closest …
Browse files Browse the repository at this point in the history
…numa policy

Signed-off-by: PiotrProkop <pprokop@nvidia.com>
  • Loading branch information
PiotrProkop committed Nov 3, 2022
1 parent d5dd42d commit 75bb437
Show file tree
Hide file tree
Showing 12 changed files with 438 additions and 114 deletions.
1 change: 1 addition & 0 deletions pkg/kubelet/cm/container_manager_linux.go
Expand Up @@ -288,6 +288,7 @@ func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.I
machineInfo.Topology,
nodeConfig.ExperimentalTopologyManagerPolicy,
nodeConfig.ExperimentalTopologyManagerScope,
nodeConfig.ExperimentalTopologyManagerPolicyOptions,
)

if err != nil {
Expand Down
2 changes: 1 addition & 1 deletion pkg/kubelet/cm/cpumanager/policy_options_test.go
Expand Up @@ -162,7 +162,7 @@ func TestValidateStaticPolicyOptions(t *testing.T) {
t.Run(testCase.description, func(t *testing.T) {
topoMgrPolicy := topologymanager.NewNonePolicy()
if testCase.topoMgrPolicy == topologymanager.PolicySingleNumaNode {
topoMgrPolicy = topologymanager.NewSingleNumaNodePolicy(nil)
topoMgrPolicy, _ = topologymanager.NewSingleNumaNodePolicy(&topologymanager.NUMAInfo{}, map[string]string{})

}
topoMgrStore := topologymanager.NewFakeManagerWithPolicy(topoMgrPolicy)
Expand Down
113 changes: 70 additions & 43 deletions pkg/kubelet/cm/topologymanager/policy.go
Expand Up @@ -33,11 +33,10 @@ type Policy interface {
// Merge a TopologyHints permutation to a single hint by performing a bitwise-AND
// of their affinity masks. The hint shall be preferred if all hits in the permutation
// are preferred.
func mergePermutation(numaNodes []int, permutation []TopologyHint) TopologyHint {
func mergePermutation(defaultAffinity bitmask.BitMask, permutation []TopologyHint) TopologyHint {
// Get the NUMANodeAffinity from each hint in the permutation and see if any
// of them encode unpreferred allocations.
preferred := true
defaultAffinity, _ := bitmask.NewBitMask(numaNodes...)
var numaAffinities []bitmask.BitMask
for _, hint := range permutation {
// Only consider hints that have an actual NUMANodeAffinity set.
Expand Down Expand Up @@ -127,7 +126,50 @@ func maxOfMinAffinityCounts(filteredHints [][]TopologyHint) int {
return maxOfMinCount
}

func compareHints(bestNonPreferredAffinityCount int, current *TopologyHint, candidate *TopologyHint) *TopologyHint {
type HintMerger struct {
NUMAInfo *NUMAInfo
Hints [][]TopologyHint
// Set bestNonPreferredAffinityCount to help decide which affinity mask is
// preferred amongst all non-preferred hints. We calculate this value as
// the maximum of the minimum affinity counts supplied for any given hint
// provider. In other words, prefer a hint that has an affinity mask that
// includes all of the NUMA nodes from the provider that requires the most
// NUMA nodes to satisfy its allocation.
BestNonPreferredAffinityCount int
CompareNUMAAffinityMasks func(candidate *TopologyHint, current *TopologyHint) (best *TopologyHint)
}

func NewHintMerger(numaInfo *NUMAInfo, hints [][]TopologyHint, policyName string, opts PolicyOptions) HintMerger {
compareNumaAffinityMasks := func(current, candidate *TopologyHint) *TopologyHint {
// If current and candidate bitmasks are the same, prefer current hint.
if candidate.NUMANodeAffinity.IsEqual(current.NUMANodeAffinity) {
return current
}

// Otherwise compare the hints, based on the policy options provided
var best bitmask.BitMask
if (policyName != PolicySingleNumaNode) && opts.PreferClosestNUMA {
best = numaInfo.Closest(current.NUMANodeAffinity, candidate.NUMANodeAffinity)
} else {
best = numaInfo.Narrowest(current.NUMANodeAffinity, candidate.NUMANodeAffinity)
}
if best.IsEqual(current.NUMANodeAffinity) {
return current
}
return candidate
}

merger := HintMerger{
NUMAInfo: numaInfo,
Hints: hints,
BestNonPreferredAffinityCount: maxOfMinAffinityCounts(hints),
CompareNUMAAffinityMasks: compareNumaAffinityMasks,
}

return merger
}

func (m HintMerger) compare(current *TopologyHint, candidate *TopologyHint) *TopologyHint {
// Only consider candidates that result in a NUMANodeAffinity > 0 to
// replace the current bestHint.
if candidate.NUMANodeAffinity.Count() == 0 {
Expand All @@ -146,20 +188,18 @@ func compareHints(bestNonPreferredAffinityCount int, current *TopologyHint, cand
}

// If the current bestHint is preferred and the candidate hint is
// non-preferred, never update the bestHint, regardless of the
// candidate hint's narowness.
// non-preferred, never update the bestHint, regardless of how
// the candidate hint's affinity mask compares to the current
// hint's affinity mask.
if current.Preferred && !candidate.Preferred {
return current
}

// If the current bestHint and the candidate hint are both preferred,
// then only consider candidate hints that have a narrower
// NUMANodeAffinity than the NUMANodeAffinity in the current bestHint.
// then only consider fitter NUMANodeAffinity
if current.Preferred && candidate.Preferred {
if candidate.NUMANodeAffinity.IsNarrowerThan(current.NUMANodeAffinity) {
return candidate
}
return current
return m.CompareNUMAAffinityMasks(current, candidate)

}

// The only case left is if the current best bestHint and the candidate
Expand All @@ -173,13 +213,13 @@ func compareHints(bestNonPreferredAffinityCount int, current *TopologyHint, cand
// 3. current.NUMANodeAffinity.Count() < bestNonPreferredAffinityCount
//
// For case (1), the current bestHint is larger than the
// bestNonPreferredAffinityCount, so updating to any narrower mergeHint
// bestNonPreferredAffinityCount, so updating to fitter mergeHint
// is preferred over staying where we are.
//
// For case (2), the current bestHint is equal to the
// bestNonPreferredAffinityCount, so we would like to stick with what
// we have *unless* the candidate hint is also equal to
// bestNonPreferredAffinityCount and it is narrower.
// bestNonPreferredAffinityCount and it is fitter.
//
// For case (3), the current bestHint is less than
// bestNonPreferredAffinityCount, so we would like to creep back up to
Expand Down Expand Up @@ -216,33 +256,28 @@ func compareHints(bestNonPreferredAffinityCount int, current *TopologyHint, cand
// the bestNonPreferredAffinityCount.
//
// Finally, for case (3cc), we know that the current bestHint and the
// candidate hint are equal, so we simply choose the narrower of the 2.
// candidate hint are equal, so we simply choose the fitter of the 2.

// Case 1
if current.NUMANodeAffinity.Count() > bestNonPreferredAffinityCount {
if candidate.NUMANodeAffinity.IsNarrowerThan(current.NUMANodeAffinity) {
return candidate
}
return current
if current.NUMANodeAffinity.Count() > m.BestNonPreferredAffinityCount {
return m.CompareNUMAAffinityMasks(current, candidate)
}
// Case 2
if current.NUMANodeAffinity.Count() == bestNonPreferredAffinityCount {
if candidate.NUMANodeAffinity.Count() != bestNonPreferredAffinityCount {
if current.NUMANodeAffinity.Count() == m.BestNonPreferredAffinityCount {
if candidate.NUMANodeAffinity.Count() != m.BestNonPreferredAffinityCount {
return current
}
if candidate.NUMANodeAffinity.IsNarrowerThan(current.NUMANodeAffinity) {
return candidate
}
return current
return m.CompareNUMAAffinityMasks(current, candidate)
}
// Case 3a
if candidate.NUMANodeAffinity.Count() > bestNonPreferredAffinityCount {
if candidate.NUMANodeAffinity.Count() > m.BestNonPreferredAffinityCount {
return current
}
// Case 3b
if candidate.NUMANodeAffinity.Count() == bestNonPreferredAffinityCount {
if candidate.NUMANodeAffinity.Count() == m.BestNonPreferredAffinityCount {
return candidate
}

// Case 3ca
if candidate.NUMANodeAffinity.Count() > current.NUMANodeAffinity.Count() {
return candidate
Expand All @@ -251,35 +286,27 @@ func compareHints(bestNonPreferredAffinityCount int, current *TopologyHint, cand
if candidate.NUMANodeAffinity.Count() < current.NUMANodeAffinity.Count() {
return current
}

// Case 3cc
if candidate.NUMANodeAffinity.IsNarrowerThan(current.NUMANodeAffinity) {
return candidate
}
return current
return m.CompareNUMAAffinityMasks(current, candidate)

}

func mergeFilteredHints(numaNodes []int, filteredHints [][]TopologyHint) TopologyHint {
// Set bestNonPreferredAffinityCount to help decide which affinity mask is
// preferred amongst all non-preferred hints. We calculate this value as
// the maximum of the minimum affinity counts supplied for any given hint
// provider. In other words, prefer a hint that has an affinity mask that
// includes all of the NUMA nodes from the provider that requires the most
// NUMA nodes to satisfy its allocation.
bestNonPreferredAffinityCount := maxOfMinAffinityCounts(filteredHints)
func (m HintMerger) Merge() TopologyHint {
defaultAffinity := m.NUMAInfo.DefaultAffinityMask()

var bestHint *TopologyHint
iterateAllProviderTopologyHints(filteredHints, func(permutation []TopologyHint) {
iterateAllProviderTopologyHints(m.Hints, func(permutation []TopologyHint) {
// Get the NUMANodeAffinity from each hint in the permutation and see if any
// of them encode unpreferred allocations.
mergedHint := mergePermutation(numaNodes, permutation)
mergedHint := mergePermutation(defaultAffinity, permutation)

// Compare the current bestHint with the candidate mergedHint and
// update bestHint if appropriate.
bestHint = compareHints(bestNonPreferredAffinityCount, bestHint, &mergedHint)
bestHint = m.compare(bestHint, &mergedHint)
})

if bestHint == nil {
defaultAffinity, _ := bitmask.NewBitMask(numaNodes...)
bestHint = &TopologyHint{defaultAffinity, false}
}

Expand Down
19 changes: 13 additions & 6 deletions pkg/kubelet/cm/topologymanager/policy_best_effort.go
Expand Up @@ -17,8 +17,9 @@ limitations under the License.
package topologymanager

type bestEffortPolicy struct {
//List of NUMA Nodes available on the underlying machine
numaNodes []int
// numaInfo represents list of NUMA Nodes available on the underlying machine and distances between them
numaInfo *NUMAInfo
opts PolicyOptions
}

var _ Policy = &bestEffortPolicy{}
Expand All @@ -27,8 +28,13 @@ var _ Policy = &bestEffortPolicy{}
const PolicyBestEffort string = "best-effort"

// NewBestEffortPolicy returns best-effort policy.
func NewBestEffortPolicy(numaNodes []int) Policy {
return &bestEffortPolicy{numaNodes: numaNodes}
func NewBestEffortPolicy(numaInfo *NUMAInfo, topologyPolicyOptions map[string]string) (Policy, error) {
opts, err := NewPolicyOptions(topologyPolicyOptions)
if err != nil {
return nil, err
}

return &bestEffortPolicy{numaInfo: numaInfo, opts: opts}, nil
}

func (p *bestEffortPolicy) Name() string {
Expand All @@ -40,8 +46,9 @@ func (p *bestEffortPolicy) canAdmitPodResult(hint *TopologyHint) bool {
}

func (p *bestEffortPolicy) Merge(providersHints []map[string][]TopologyHint) (TopologyHint, bool) {
filteredProvidersHints := filterProvidersHints(providersHints)
bestHint := mergeFilteredHints(p.numaNodes, filteredProvidersHints)
filteredHints := filterProvidersHints(providersHints)
merger := NewHintMerger(p.numaInfo, filteredHints, p.Name(), p.opts)
bestHint := merger.Merge()
admit := p.canAdmitPodResult(&bestHint)
return bestHint, admit
}
29 changes: 22 additions & 7 deletions pkg/kubelet/cm/topologymanager/policy_best_effort_test.go
Expand Up @@ -39,9 +39,9 @@ func TestPolicyBestEffortCanAdmitPodResult(t *testing.T) {
}

for _, tc := range tcases {
numaNodes := []int{0, 1}
policy := NewBestEffortPolicy(numaNodes)
result := policy.(*bestEffortPolicy).canAdmitPodResult(&tc.hint)
numaInfo := commonNUMAInfoTwoNodes()
policy := &bestEffortPolicy{numaInfo: numaInfo}
result := policy.canAdmitPodResult(&tc.hint)

if result != tc.expected {
t.Errorf("Expected result to be %t, got %t", tc.expected, result)
Expand All @@ -50,11 +50,26 @@ func TestPolicyBestEffortCanAdmitPodResult(t *testing.T) {
}

func TestPolicyBestEffortMerge(t *testing.T) {
numaNodes := []int{0, 1, 2, 3}
policy := NewBestEffortPolicy(numaNodes)
numaInfo := commonNUMAInfoFourNodes()
policy := &bestEffortPolicy{numaInfo: numaInfo}

tcases := commonPolicyMergeTestCases(numaNodes)
tcases = append(tcases, policy.(*bestEffortPolicy).mergeTestCases(numaNodes)...)
tcases := commonPolicyMergeTestCases(numaInfo.Nodes)
tcases = append(tcases, policy.mergeTestCases(numaInfo.Nodes)...)
tcases = append(tcases, policy.mergeTestCasesNoPolicies(numaInfo.Nodes)...)

testPolicyMerge(policy, tcases, t)
}

func TestPolicyBestEffortMergeClosestNUMA(t *testing.T) {
numaInfo := commonNUMAInfoEightNodes()
opts := PolicyOptions{
PreferClosestNUMA: true,
}
policy := &bestEffortPolicy{numaInfo: numaInfo, opts: opts}

tcases := commonPolicyMergeTestCases(numaInfo.Nodes)
tcases = append(tcases, policy.mergeTestCases(numaInfo.Nodes)...)
tcases = append(tcases, policy.mergeTestCasesClosestNUMA(numaInfo.Nodes)...)

testPolicyMerge(policy, tcases, t)
}
16 changes: 11 additions & 5 deletions pkg/kubelet/cm/topologymanager/policy_restricted.go
Expand Up @@ -26,8 +26,13 @@ var _ Policy = &restrictedPolicy{}
const PolicyRestricted string = "restricted"

// NewRestrictedPolicy returns restricted policy.
func NewRestrictedPolicy(numaNodes []int) Policy {
return &restrictedPolicy{bestEffortPolicy{numaNodes: numaNodes}}
func NewRestrictedPolicy(numaInfo *NUMAInfo, topologyPolicyOptions map[string]string) (Policy, error) {
opts, err := NewPolicyOptions(topologyPolicyOptions)
if err != nil {
return nil, err
}

return &restrictedPolicy{bestEffortPolicy{numaInfo: numaInfo, opts: opts}}, nil
}

func (p *restrictedPolicy) Name() string {
Expand All @@ -40,7 +45,8 @@ func (p *restrictedPolicy) canAdmitPodResult(hint *TopologyHint) bool {

func (p *restrictedPolicy) Merge(providersHints []map[string][]TopologyHint) (TopologyHint, bool) {
filteredHints := filterProvidersHints(providersHints)
hint := mergeFilteredHints(p.numaNodes, filteredHints)
admit := p.canAdmitPodResult(&hint)
return hint, admit
merger := NewHintMerger(p.numaInfo, filteredHints, p.Name(), p.opts)
bestHint := merger.Merge()
admit := p.canAdmitPodResult(&bestHint)
return bestHint, admit
}
29 changes: 21 additions & 8 deletions pkg/kubelet/cm/topologymanager/policy_restricted_test.go
Expand Up @@ -30,8 +30,9 @@ func TestPolicyRestrictedName(t *testing.T) {
expected: "restricted",
},
}
numaInfo := commonNUMAInfoTwoNodes()
for _, tc := range tcases {
policy := NewRestrictedPolicy([]int{0, 1})
policy := &restrictedPolicy{bestEffortPolicy{numaInfo: numaInfo, opts: PolicyOptions{}}}
if policy.Name() != tc.expected {
t.Errorf("Expected Policy Name to be %s, got %s", tc.expected, policy.Name())
}
Expand All @@ -57,9 +58,9 @@ func TestPolicyRestrictedCanAdmitPodResult(t *testing.T) {
}

for _, tc := range tcases {
numaNodes := []int{0, 1}
policy := NewRestrictedPolicy(numaNodes)
result := policy.(*restrictedPolicy).canAdmitPodResult(&tc.hint)
numaInfo := commonNUMAInfoTwoNodes()
policy := &restrictedPolicy{bestEffortPolicy{numaInfo: numaInfo}}
result := policy.canAdmitPodResult(&tc.hint)

if result != tc.expected {
t.Errorf("Expected result to be %t, got %t", tc.expected, result)
Expand All @@ -68,11 +69,23 @@ func TestPolicyRestrictedCanAdmitPodResult(t *testing.T) {
}

func TestPolicyRestrictedMerge(t *testing.T) {
numaNodes := []int{0, 1, 2, 3}
policy := NewRestrictedPolicy(numaNodes)
numaInfo := commonNUMAInfoFourNodes()
policy := &restrictedPolicy{bestEffortPolicy{numaInfo: numaInfo}}

tcases := commonPolicyMergeTestCases(numaNodes)
tcases = append(tcases, policy.(*restrictedPolicy).mergeTestCases(numaNodes)...)
tcases := commonPolicyMergeTestCases(numaInfo.Nodes)
tcases = append(tcases, policy.mergeTestCases(numaInfo.Nodes)...)
tcases = append(tcases, policy.mergeTestCasesNoPolicies(numaInfo.Nodes)...)

testPolicyMerge(policy, tcases, t)
}

func TestPolicyRestrictedMergeClosestNUMA(t *testing.T) {
numaInfo := commonNUMAInfoEightNodes()
policy := &restrictedPolicy{bestEffortPolicy{numaInfo: numaInfo, opts: PolicyOptions{PreferClosestNUMA: true}}}

tcases := commonPolicyMergeTestCases(numaInfo.Nodes)
tcases = append(tcases, policy.mergeTestCases(numaInfo.Nodes)...)
tcases = append(tcases, policy.mergeTestCasesClosestNUMA(numaInfo.Nodes)...)

testPolicyMerge(policy, tcases, t)
}

0 comments on commit 75bb437

Please sign in to comment.