From 4c75be0604f3bb0853209018dcc5c0847d39775f Mon Sep 17 00:00:00 2001 From: Artyom Lukianov Date: Thu, 5 Mar 2020 13:25:46 +0200 Subject: [PATCH 01/31] memory manager: provide the skeleton for the memory manager Provide memory manager struct and methods that should be implemented. Signed-off-by: Artyom Lukianov --- .../cm/memorymanager/memory_manager.go | 151 ++++++++++++++++++ 1 file changed, 151 insertions(+) create mode 100644 pkg/kubelet/cm/memorymanager/memory_manager.go diff --git a/pkg/kubelet/cm/memorymanager/memory_manager.go b/pkg/kubelet/cm/memorymanager/memory_manager.go new file mode 100644 index 000000000000..1c2f81d32f5b --- /dev/null +++ b/pkg/kubelet/cm/memorymanager/memory_manager.go @@ -0,0 +1,151 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package memorymanager + +import ( + "sync" + "time" + + cadvisorapi "github.com/google/cadvisor/info/v1" + + v1 "k8s.io/api/core/v1" + runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" + "k8s.io/kubernetes/pkg/kubelet/cm/containermap" + "k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state" + "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" + "k8s.io/kubernetes/pkg/kubelet/config" + "k8s.io/kubernetes/pkg/kubelet/status" +) + +// memoryManagerStateFileName is the file name where memory manager stores its state +const memoryManagerStateFileName = "memory_manager_state" + +// ActivePodsFunc is a function that returns a list of pods to reconcile. +type ActivePodsFunc func() []*v1.Pod + +type runtimeService interface { + UpdateContainerResources(id string, resources *runtimeapi.LinuxContainerResources) error +} + +type sourcesReadyStub struct{} + +func (s *sourcesReadyStub) AddSource(source string) {} +func (s *sourcesReadyStub) AllReady() bool { return true } + +// Manager interface provides methods for Kubelet to manage pod memory. +type Manager interface { + // Start is called during Kubelet initialization. + Start(activePods ActivePodsFunc, sourcesReady config.SourcesReady, podStatusProvider status.PodStatusProvider, containerRuntime runtimeService, initialContainers containermap.ContainerMap) error + + // AddContainer is called between container create and container start + // so that initial memory affinity settings can be written through to the + // container runtime before the first process begins to execute. + AddContainer(p *v1.Pod, c *v1.Container, containerID string) error + + // Allocate is called to pre-allocate memory resources during Pod admission. + // This must be called at some point prior to the AddContainer() call for a container, e.g. at pod admission time. + Allocate(pod *v1.Pod, container *v1.Container) error + + // RemoveContainer is called after Kubelet decides to kill or delete a + // container. After this call, the memory manager stops trying to reconcile + // that container, and any memory allocated to the container are freed. + RemoveContainer(containerID string) error + + // State returns a read-only interface to the internal memory manager state. + State() state.Reader + + // GetTopologyHints implements the topologymanager.HintProvider Interface + // and is consulted to achieve NUMA aware resource alignment among this + // and other resource controllers. + GetTopologyHints(*v1.Pod, *v1.Container) map[string][]topologymanager.TopologyHint + + // GetPodTopologyHints implements the topologymanager.HintProvider Interface + // and is consulted to achieve NUMA aware resource alignment among this + // and other resource controllers. + GetPodTopologyHints(*v1.Pod) map[string][]topologymanager.TopologyHint +} + +type manager struct { + sync.Mutex + policy Policy + + // state allows to restore information regarding memory allocation for guaranteed pods + // in the case of the kubelet restart + state state.State + + // containerRuntime is the container runtime service interface needed + // to make UpdateContainerResources() calls against the containers. + containerRuntime runtimeService + + // activePods is a method for listing active pods on the node + // so all the containers can be updated in the reconciliation loop. + activePods ActivePodsFunc + + // podStatusProvider provides a method for obtaining pod statuses + // and the containerID of their containers + podStatusProvider status.PodStatusProvider + + // containerMap provides a mapping from (pod, container) -> containerID + // for all containers a pod + containerMap containermap.ContainerMap + + nodeAllocatableReservation v1.ResourceList + + // sourcesReady provides the readiness of kubelet configuration sources such as apiserver update readiness. + // We use it to determine when we can purge inactive pods from checkpointed state. + sourcesReady config.SourcesReady + + // stateFileDirectory holds the directory where the state file for checkpoints is held. + stateFileDirectory string +} + +var _ Manager = &manager{} + +// NewManager returns new instance of the memory manager +func NewManager(reconcilePeriod time.Duration, machineInfo *cadvisorapi.MachineInfo, nodeAllocatableReservation v1.ResourceList, stateFileDirectory string, affinity topologymanager.Store) (Manager, error) { + +} + +// Start starts the memory manager reconcile loop under the kubelet to keep state updated +func (m *manager) Start(activePods ActivePodsFunc, sourcesReady config.SourcesReady, podStatusProvider status.PodStatusProvider, containerRuntime runtimeService, initialContainers containermap.ContainerMap) error { + +} + +// AddContainer saves the value of requested memory for the guranteed pod under the state and set memory affinity according to the topolgy manager +func (m *manager) AddContainer(p *v1.Pod, c *v1.Container, containerID string) error { + +} + +// Allocate is called to pre-allocate memory resources during Pod admission. +func (m *manager) Allocate(pod *v1.Pod, container *v1.Container) error { + +} + +// RemoveContainer removes the container from the state +func (m *manager) RemoveContainer(containerID string) error { + +} + +// State returns the state of the manager +func (m *manager) State() state.Reader { + return m.state +} + +// GetTopologyHints returns the topology hints for the topology manager +func (m *manager) GetTopologyHints(pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint { + +} From 48ca6e53e6ffcb9cdaacb3fd2b1575575e04b129 Mon Sep 17 00:00:00 2001 From: Artyom Lukianov Date: Tue, 10 Mar 2020 15:36:16 +0200 Subject: [PATCH 02/31] memory manager: provide and use the checkpoint manager The checkpoint manager provides a way to save the memory manager `MemoryTable` both under the memory and under the state file. Saving the `MemoryTable` under the state file can be useful when kubelet restarted and you want to restore memory allocations for running containers. Also, it provides a way to monitor memory allocations done by the memory manager, and in the future, the state file content can be exposed under the pod metrics. Signed-off-by: Artyom Lukianov --- pkg/kubelet/cm/memorymanager/state/BUILD | 57 ++++++ .../cm/memorymanager/state/checkpoint.go | 65 ++++++ pkg/kubelet/cm/memorymanager/state/state.go | 130 ++++++++++++ .../memorymanager/state/state_checkpoint.go | 187 ++++++++++++++++++ .../cm/memorymanager/state/state_mem.go | 123 ++++++++++++ 5 files changed, 562 insertions(+) create mode 100644 pkg/kubelet/cm/memorymanager/state/BUILD create mode 100644 pkg/kubelet/cm/memorymanager/state/checkpoint.go create mode 100644 pkg/kubelet/cm/memorymanager/state/state.go create mode 100644 pkg/kubelet/cm/memorymanager/state/state_checkpoint.go create mode 100644 pkg/kubelet/cm/memorymanager/state/state_mem.go diff --git a/pkg/kubelet/cm/memorymanager/state/BUILD b/pkg/kubelet/cm/memorymanager/state/BUILD new file mode 100644 index 000000000000..438737c9d181 --- /dev/null +++ b/pkg/kubelet/cm/memorymanager/state/BUILD @@ -0,0 +1,57 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") + +go_library( + name = "go_default_library", + srcs = [ + "checkpoint.go", + "state.go", + "state_checkpoint.go", + "state_file.go", + "state_mem.go", + ], + importpath = "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state", + visibility = ["//visibility:public"], + deps = [ + "//pkg/kubelet/checkpointmanager:go_default_library", + "//pkg/kubelet/checkpointmanager/checksum:go_default_library", + "//pkg/kubelet/checkpointmanager/errors:go_default_library", + "//pkg/kubelet/cm/cpumanager/containermap:go_default_library", + "//pkg/kubelet/cm/cpuset:go_default_library", + "//vendor/github.com/davecgh/go-spew/spew:go_default_library", + "//vendor/k8s.io/klog:go_default_library", + ], +) + +go_test( + name = "go_default_test", + srcs = [ + "state_checkpoint_test.go", + "state_compatibility_test.go", + "state_file_test.go", + ], + embed = [":go_default_library"], + deps = [ + "//pkg/kubelet/checkpointmanager:go_default_library", + "//pkg/kubelet/cm/cpumanager/containermap:go_default_library", + "//pkg/kubelet/cm/cpumanager/state/testing:go_default_library", + "//pkg/kubelet/cm/cpuset:go_default_library", + "//vendor/github.com/stretchr/testify/require:go_default_library", + ], +) + +filegroup( + name = "package-srcs", + srcs = glob(["**"]), + tags = ["automanaged"], + visibility = ["//visibility:private"], +) + +filegroup( + name = "all-srcs", + srcs = [ + ":package-srcs", + "//pkg/kubelet/cm/cpumanager/state/testing:all-srcs", + ], + tags = ["automanaged"], + visibility = ["//visibility:public"], +) diff --git a/pkg/kubelet/cm/memorymanager/state/checkpoint.go b/pkg/kubelet/cm/memorymanager/state/checkpoint.go new file mode 100644 index 000000000000..918bbfb67634 --- /dev/null +++ b/pkg/kubelet/cm/memorymanager/state/checkpoint.go @@ -0,0 +1,65 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package state + +import ( + "encoding/json" + + "k8s.io/kubernetes/pkg/kubelet/checkpointmanager" + "k8s.io/kubernetes/pkg/kubelet/checkpointmanager/checksum" +) + +var _ checkpointmanager.Checkpoint = &MemoryManagerCheckpoint{} + +// MemoryManagerCheckpoint struct is used to store memory/pod assignments in a checkpoint +type MemoryManagerCheckpoint struct { + PolicyName string `json:"policyName"` + MachineState NodeMap `json:"machineState"` + Entries ContainerMemoryAssignments `json:"entries,omitempty"` + Checksum checksum.Checksum `json:"checksum"` +} + +// NewMemoryManagerCheckpoint returns an instance of Checkpoint +func NewMemoryManagerCheckpoint() *MemoryManagerCheckpoint { + //lint:ignore unexported-type-in-api user-facing error message + return &MemoryManagerCheckpoint{ + Entries: ContainerMemoryAssignments{}, + MachineState: NodeMap{}, + } +} + +// MarshalCheckpoint returns marshalled checkpoint +func (mp *MemoryManagerCheckpoint) MarshalCheckpoint() ([]byte, error) { + // make sure checksum wasn't set before so it doesn't affect output checksum + mp.Checksum = 0 + mp.Checksum = checksum.New(mp) + return json.Marshal(*mp) +} + +// UnmarshalCheckpoint tries to unmarshal passed bytes to checkpoint +func (mp *MemoryManagerCheckpoint) UnmarshalCheckpoint(blob []byte) error { + return json.Unmarshal(blob, mp) +} + +// VerifyChecksum verifies that current checksum of checkpoint is valid +func (mp *MemoryManagerCheckpoint) VerifyChecksum() error { + ck := mp.Checksum + mp.Checksum = 0 + err := ck.Verify(mp) + mp.Checksum = ck + return err +} diff --git a/pkg/kubelet/cm/memorymanager/state/state.go b/pkg/kubelet/cm/memorymanager/state/state.go new file mode 100644 index 000000000000..00a6f2a87575 --- /dev/null +++ b/pkg/kubelet/cm/memorymanager/state/state.go @@ -0,0 +1,130 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package state + +import ( + v1 "k8s.io/api/core/v1" +) + +// MemoryTable contains memory information +type MemoryTable struct { + TotalMemSize uint64 `json:"total"` + SystemReserved uint64 `json:"systemReserved"` + Allocatable uint64 `json:"allocatable"` + Reserved uint64 `json:"reserved"` + Free uint64 `json:"free"` +} + +// NodeState contains NUMA node related information +type NodeState struct { + // NumberOfAssignments contains a number memory assignments from this node + // When the container requires memory and hugepages it will increase number of assignments by two + NumberOfAssignments int `json:"numberOfAssignments"` + // MemoryTable contains NUMA node memory related information + MemoryMap map[v1.ResourceName]*MemoryTable `json:"memoryMap"` + // Nodes contains the current NUMA node and all other nodes that are in a group with current NUMA node + // This parameter indicates if the current node is used for the multiple NUMA node memory allocation + // For example if some container has pinning 0,1,2, NUMA nodes 0,1,2 under the state will have + // this parameter equals to [0, 1, 2] + Nodes []int `json:"nodes"` +} + +// NodeMap contains memory information for each NUMA node. +type NodeMap map[int]*NodeState + +// Clone returns a copy of NodeMap +func (nm NodeMap) Clone() NodeMap { + clone := make(NodeMap) + for node, s := range nm { + if s == nil { + clone[node] = nil + continue + } + + clone[node] = &NodeState{} + clone[node].NumberOfAssignments = s.NumberOfAssignments + clone[node].Nodes = append([]int{}, s.Nodes...) + + if s.MemoryMap == nil { + continue + } + + clone[node].MemoryMap = map[v1.ResourceName]*MemoryTable{} + for memoryType, memoryTable := range s.MemoryMap { + clone[node].MemoryMap[memoryType] = &MemoryTable{ + Allocatable: memoryTable.Allocatable, + Free: memoryTable.Free, + Reserved: memoryTable.Reserved, + SystemReserved: memoryTable.SystemReserved, + TotalMemSize: memoryTable.TotalMemSize, + } + } + } + return clone +} + +// Block is a data structure used to represent a certain amount of memory +type Block struct { + // NUMAAffinity contains the string that represents NUMA affinity bitmask + NUMAAffinity []int `json:"numaAffinity"` + Type v1.ResourceName `json:"type"` + Size uint64 `json:"size"` +} + +// ContainerMemoryAssignments stores memory assignments of containers +type ContainerMemoryAssignments map[string]map[string][]Block + +// Clone returns a copy of ContainerMemoryAssignments +func (as ContainerMemoryAssignments) Clone() ContainerMemoryAssignments { + clone := make(ContainerMemoryAssignments) + for pod := range as { + clone[pod] = make(map[string][]Block) + for container, blocks := range as[pod] { + clone[pod][container] = append([]Block{}, blocks...) + } + } + return clone +} + +// Reader interface used to read current memory/pod assignment state +type Reader interface { + // GetMachineState returns Memory Map stored in the State + GetMachineState() NodeMap + // GetMemoryBlocks returns memory assignments of a container + GetMemoryBlocks(podUID string, containerName string) []Block + // GetMemoryAssignments returns ContainerMemoryAssignments + GetMemoryAssignments() ContainerMemoryAssignments +} + +type writer interface { + // SetMachineState stores NodeMap in State + SetMachineState(memoryMap NodeMap) + // SetMemoryBlocks stores memory assignments of a container + SetMemoryBlocks(podUID string, containerName string, blocks []Block) + // SetMemoryAssignments sets ContainerMemoryAssignments by using the passed parameter + SetMemoryAssignments(assignments ContainerMemoryAssignments) + // Delete deletes corresponding Blocks from ContainerMemoryAssignments + Delete(podUID string, containerName string) + // ClearState clears machineState and ContainerMemoryAssignments + ClearState() +} + +// State interface provides methods for tracking and setting memory/pod assignment +type State interface { + Reader + writer +} diff --git a/pkg/kubelet/cm/memorymanager/state/state_checkpoint.go b/pkg/kubelet/cm/memorymanager/state/state_checkpoint.go new file mode 100644 index 000000000000..fd3dabd6d54d --- /dev/null +++ b/pkg/kubelet/cm/memorymanager/state/state_checkpoint.go @@ -0,0 +1,187 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package state + +import ( + "fmt" + "path" + "sync" + + "k8s.io/klog/v2" + "k8s.io/kubernetes/pkg/kubelet/checkpointmanager" + "k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors" + "k8s.io/kubernetes/pkg/kubelet/cm/containermap" +) + +var _ State = &stateCheckpoint{} + +type stateCheckpoint struct { + sync.RWMutex + cache State + policyName string + checkpointManager checkpointmanager.CheckpointManager + checkpointName string + initialContainers containermap.ContainerMap +} + +// NewCheckpointState creates new State for keeping track of memory/pod assignment with checkpoint backend +func NewCheckpointState(stateDir, checkpointName, policyName string, initialContainers containermap.ContainerMap) (State, error) { + checkpointManager, err := checkpointmanager.NewCheckpointManager(stateDir) + if err != nil { + return nil, fmt.Errorf("failed to initialize checkpoint manager: %v", err) + } + stateCheckpoint := &stateCheckpoint{ + cache: NewMemoryState(), + policyName: policyName, + checkpointManager: checkpointManager, + checkpointName: checkpointName, + initialContainers: initialContainers, + } + + if err := stateCheckpoint.restoreState(); err != nil { + //lint:ignore ST1005 user-facing error message + return nil, fmt.Errorf("could not restore state from checkpoint: %v, please drain this node and delete the memory manager checkpoint file %q before restarting Kubelet", + err, path.Join(stateDir, checkpointName)) + } + + return stateCheckpoint, nil +} + +// restores state from a checkpoint and creates it if it doesn't exist +func (sc *stateCheckpoint) restoreState() error { + sc.Lock() + defer sc.Unlock() + var err error + + checkpoint := NewMemoryManagerCheckpoint() + if err = sc.checkpointManager.GetCheckpoint(sc.checkpointName, checkpoint); err != nil { + if err == errors.ErrCheckpointNotFound { + return sc.storeState() + } + return err + } + + if sc.policyName != checkpoint.PolicyName { + return fmt.Errorf("[memorymanager] configured policy %q differs from state checkpoint policy %q", sc.policyName, checkpoint.PolicyName) + } + + sc.cache.SetMachineState(checkpoint.MachineState) + sc.cache.SetMemoryAssignments(checkpoint.Entries) + + klog.V(2).Info("[memorymanager] state checkpoint: restored state from checkpoint") + + return nil +} + +// saves state to a checkpoint, caller is responsible for locking +func (sc *stateCheckpoint) storeState() error { + checkpoint := NewMemoryManagerCheckpoint() + checkpoint.PolicyName = sc.policyName + checkpoint.MachineState = sc.cache.GetMachineState() + checkpoint.Entries = sc.cache.GetMemoryAssignments() + + err := sc.checkpointManager.CreateCheckpoint(sc.checkpointName, checkpoint) + if err != nil { + klog.Errorf("[memorymanager] could not save checkpoint: %v", err) + return err + } + return nil +} + +// GetMemoryState returns Memory Map stored in the State +func (sc *stateCheckpoint) GetMachineState() NodeMap { + sc.RLock() + defer sc.RUnlock() + + return sc.cache.GetMachineState() +} + +// GetMemoryBlocks returns memory assignments of a container +func (sc *stateCheckpoint) GetMemoryBlocks(podUID string, containerName string) []Block { + sc.RLock() + defer sc.RUnlock() + + return sc.cache.GetMemoryBlocks(podUID, containerName) +} + +// GetMemoryAssignments returns ContainerMemoryAssignments +func (sc *stateCheckpoint) GetMemoryAssignments() ContainerMemoryAssignments { + sc.RLock() + defer sc.RUnlock() + + return sc.cache.GetMemoryAssignments() +} + +// SetMachineState stores NodeMap in State +func (sc *stateCheckpoint) SetMachineState(memoryMap NodeMap) { + sc.Lock() + defer sc.Unlock() + + sc.cache.SetMachineState(memoryMap) + err := sc.storeState() + if err != nil { + klog.Warningf("store state to checkpoint error: %v", err) + } +} + +// SetMemoryBlocks stores memory assignments of container +func (sc *stateCheckpoint) SetMemoryBlocks(podUID string, containerName string, blocks []Block) { + sc.Lock() + defer sc.Unlock() + + sc.cache.SetMemoryBlocks(podUID, containerName, blocks) + err := sc.storeState() + if err != nil { + klog.Warningf("store state to checkpoint error: %v", err) + } +} + +// SetMemoryAssignments sets ContainerMemoryAssignments by using the passed parameter +func (sc *stateCheckpoint) SetMemoryAssignments(assignments ContainerMemoryAssignments) { + sc.Lock() + defer sc.Unlock() + + sc.cache.SetMemoryAssignments(assignments) + err := sc.storeState() + if err != nil { + klog.Warningf("store state to checkpoint error: %v", err) + } +} + +// Delete deletes corresponding Blocks from ContainerMemoryAssignments +func (sc *stateCheckpoint) Delete(podUID string, containerName string) { + sc.Lock() + defer sc.Unlock() + + sc.cache.Delete(podUID, containerName) + err := sc.storeState() + if err != nil { + klog.Warningf("store state to checkpoint error: %v", err) + } +} + +// ClearState clears machineState and ContainerMemoryAssignments +func (sc *stateCheckpoint) ClearState() { + sc.Lock() + defer sc.Unlock() + + sc.cache.ClearState() + err := sc.storeState() + if err != nil { + klog.Warningf("store state to checkpoint error: %v", err) + } +} diff --git a/pkg/kubelet/cm/memorymanager/state/state_mem.go b/pkg/kubelet/cm/memorymanager/state/state_mem.go new file mode 100644 index 000000000000..a84dabcf27a1 --- /dev/null +++ b/pkg/kubelet/cm/memorymanager/state/state_mem.go @@ -0,0 +1,123 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package state + +import ( + "sync" + + "k8s.io/klog/v2" +) + +type stateMemory struct { + sync.RWMutex + assignments ContainerMemoryAssignments + machineState NodeMap +} + +var _ State = &stateMemory{} + +// NewMemoryState creates new State for keeping track of cpu/pod assignment +func NewMemoryState() State { + klog.Infof("[memorymanager] initializing new in-memory state store") + return &stateMemory{ + assignments: ContainerMemoryAssignments{}, + machineState: NodeMap{}, + } +} + +// GetMemoryState returns Memory Map stored in the State +func (s *stateMemory) GetMachineState() NodeMap { + s.RLock() + defer s.RUnlock() + + return s.machineState.Clone() +} + +// GetMemoryBlocks returns memory assignments of a container +func (s *stateMemory) GetMemoryBlocks(podUID string, containerName string) []Block { + s.RLock() + defer s.RUnlock() + + if res, ok := s.assignments[podUID][containerName]; ok { + return append([]Block{}, res...) + } + return nil +} + +// GetMemoryAssignments returns ContainerMemoryAssignments +func (s *stateMemory) GetMemoryAssignments() ContainerMemoryAssignments { + s.RLock() + defer s.RUnlock() + + return s.assignments.Clone() +} + +// SetMachineState stores NodeMap in State +func (s *stateMemory) SetMachineState(nodeMap NodeMap) { + s.Lock() + defer s.Unlock() + + s.machineState = nodeMap.Clone() + klog.Info("[memorymanager] updated machine memory state") +} + +// SetMemoryBlocks stores memory assignments of container +func (s *stateMemory) SetMemoryBlocks(podUID string, containerName string, blocks []Block) { + s.Lock() + defer s.Unlock() + + if _, ok := s.assignments[podUID]; !ok { + s.assignments[podUID] = map[string][]Block{} + } + + s.assignments[podUID][containerName] = append([]Block{}, blocks...) + klog.Infof("[memorymanager] updated memory state (pod: %s, container: %s)", podUID, containerName) +} + +// SetMemoryAssignments sets ContainerMemoryAssignments by using the passed parameter +func (s *stateMemory) SetMemoryAssignments(assignments ContainerMemoryAssignments) { + s.Lock() + defer s.Unlock() + + s.assignments = assignments.Clone() +} + +// Delete deletes corresponding Blocks from ContainerMemoryAssignments +func (s *stateMemory) Delete(podUID string, containerName string) { + s.Lock() + defer s.Unlock() + + if _, ok := s.assignments[podUID]; !ok { + return + } + + delete(s.assignments[podUID], containerName) + if len(s.assignments[podUID]) == 0 { + delete(s.assignments, podUID) + } + klog.V(2).Infof("[memorymanager] deleted memory assignment (pod: %s, container: %s)", podUID, containerName) +} + +// ClearState clears machineState and ContainerMemoryAssignments +func (s *stateMemory) ClearState() { + s.Lock() + defer s.Unlock() + + s.machineState = NodeMap{} + s.assignments = make(ContainerMemoryAssignments) + klog.V(2).Infof("[memorymanager] cleared state") +} From 86df524948b4fe41a9161eb44b3d6fdb5f3ef224 Mon Sep 17 00:00:00 2001 From: Artyom Lukianov Date: Thu, 8 Oct 2020 17:05:12 +0300 Subject: [PATCH 03/31] memory manager: provide unittest for the state package The commit includes tests to verify the functionallity: - to restore state from the file - to store the state to the file - to clean the state from old data Signed-off-by: Artyom Lukianov --- .../state/state_checkpoint_test.go | 390 ++++++++++++++++++ 1 file changed, 390 insertions(+) create mode 100644 pkg/kubelet/cm/memorymanager/state/state_checkpoint_test.go diff --git a/pkg/kubelet/cm/memorymanager/state/state_checkpoint_test.go b/pkg/kubelet/cm/memorymanager/state/state_checkpoint_test.go new file mode 100644 index 000000000000..f0a762026599 --- /dev/null +++ b/pkg/kubelet/cm/memorymanager/state/state_checkpoint_test.go @@ -0,0 +1,390 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package state + +import ( + "io/ioutil" + "os" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + + v1 "k8s.io/api/core/v1" + "k8s.io/kubernetes/pkg/kubelet/checkpointmanager" + "k8s.io/kubernetes/pkg/kubelet/cm/containermap" + testutil "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state/testing" +) + +const testingCheckpoint = "memorymanager_checkpoint_test" + +// assertStateEqual marks provided test as failed if provided states differ +func assertStateEqual(t *testing.T, restoredState, expectedState State) { + expectedMachineState := expectedState.GetMachineState() + restoredMachineState := restoredState.GetMachineState() + assert.Equal(t, expectedMachineState, restoredMachineState, "expected MachineState does not equal to restored one") + + expectedMemoryAssignments := expectedState.GetMemoryAssignments() + restoredMemoryAssignments := restoredState.GetMemoryAssignments() + assert.Equal(t, expectedMemoryAssignments, restoredMemoryAssignments, "state memory assignments mismatch") +} + +func TestCheckpointStateRestore(t *testing.T) { + testCases := []struct { + description string + checkpointContent string + initialContainers containermap.ContainerMap + expectedError string + expectedState *stateMemory + }{ + { + "Restore non-existing checkpoint", + "", + containermap.ContainerMap{}, + "", + &stateMemory{}, + }, + { + "Restore valid checkpoint", + `{ + "policyName":"static", + "machineState":{"0":{"numberOfAssignments":0,"memoryMap":{"memory":{"total":2048,"systemReserved":512,"allocatable":1536,"reserved":512,"free":1024}},"nodes":[]}}, + "entries":{"pod":{"container1":[{"numaAffinity":[0],"type":"memory","size":512}]}}, + "checksum": 163710462 + }`, + containermap.ContainerMap{}, + "", + &stateMemory{ + assignments: ContainerMemoryAssignments{ + "pod": map[string][]Block{ + "container1": { + { + NUMAAffinity: []int{0}, + Type: v1.ResourceMemory, + Size: 512, + }, + }, + }, + }, + machineState: NodeMap{ + 0: &NodeState{ + MemoryMap: map[v1.ResourceName]*MemoryTable{ + v1.ResourceMemory: { + Allocatable: 1536, + Free: 1024, + Reserved: 512, + SystemReserved: 512, + TotalMemSize: 2048, + }, + }, + }, + }, + }, + }, + { + "Restore checkpoint with invalid checksum", + `{ + "policyName":"static", + "machineState":{"0":{"numberOfAssignments":0,"memoryMap":{"memory":{"total":2048,"systemReserved":512,"allocatable":1536,"reserved":512,"free":1024}},"nodes":[]}}, + "entries":{"pod":{"container1":[{"affinity":[0],"type":"memory","size":512}]}}, + "checksum": 101010 + }`, + containermap.ContainerMap{}, + "checkpoint is corrupted", + &stateMemory{}, + }, + { + "Restore checkpoint with invalid JSON", + `{`, + containermap.ContainerMap{}, + "unexpected end of JSON input", + &stateMemory{}, + }, + } + + // create temp dir + testingDir, err := ioutil.TempDir("", "memorymanager_state_test") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(testingDir) + + // create checkpoint manager for testing + cpm, err := checkpointmanager.NewCheckpointManager(testingDir) + assert.NoError(t, err, "could not create testing checkpoint manager") + + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + // ensure there is no previous checkpoint + assert.NoError(t, cpm.RemoveCheckpoint(testingCheckpoint), "could not remove testing checkpoint") + + // prepare checkpoint for testing + if strings.TrimSpace(tc.checkpointContent) != "" { + checkpoint := &testutil.MockCheckpoint{Content: tc.checkpointContent} + assert.NoError(t, cpm.CreateCheckpoint(testingCheckpoint, checkpoint), "could not create testing checkpoint") + } + + restoredState, err := NewCheckpointState(testingDir, testingCheckpoint, "static", tc.initialContainers) + if strings.TrimSpace(tc.expectedError) != "" { + assert.Error(t, err) + assert.Contains(t, err.Error(), "could not restore state from checkpoint: "+tc.expectedError) + } else { + assert.NoError(t, err, "unexpected error while creating checkpointState") + // compare state after restoration with the one expected + assertStateEqual(t, restoredState, tc.expectedState) + } + }) + } +} + +func TestCheckpointStateStore(t *testing.T) { + expectedState := &stateMemory{ + assignments: ContainerMemoryAssignments{ + "pod": map[string][]Block{ + "container1": { + { + NUMAAffinity: []int{0}, + Type: v1.ResourceMemory, + Size: 1024, + }, + }, + }, + }, + machineState: NodeMap{ + 0: &NodeState{ + MemoryMap: map[v1.ResourceName]*MemoryTable{ + v1.ResourceMemory: { + Allocatable: 1536, + Free: 512, + Reserved: 1024, + SystemReserved: 512, + TotalMemSize: 2048, + }, + }, + }, + }, + } + + // create temp dir + testingDir, err := ioutil.TempDir("", "memorymanager_state_test") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(testingDir) + + cpm, err := checkpointmanager.NewCheckpointManager(testingDir) + assert.NoError(t, err, "could not create testing checkpoint manager") + + assert.NoError(t, cpm.RemoveCheckpoint(testingCheckpoint), "could not remove testing checkpoint") + + cs1, err := NewCheckpointState(testingDir, testingCheckpoint, "static", nil) + assert.NoError(t, err, "could not create testing checkpointState instance") + + // set values of cs1 instance so they are stored in checkpoint and can be read by cs2 + cs1.SetMachineState(expectedState.machineState) + cs1.SetMemoryAssignments(expectedState.assignments) + + // restore checkpoint with previously stored values + cs2, err := NewCheckpointState(testingDir, testingCheckpoint, "static", nil) + assert.NoError(t, err, "could not create testing checkpointState instance") + + assertStateEqual(t, cs2, expectedState) +} + +func TestCheckpointStateHelpers(t *testing.T) { + testCases := []struct { + description string + machineState NodeMap + assignments ContainerMemoryAssignments + }{ + { + description: "One container", + assignments: ContainerMemoryAssignments{ + "pod": map[string][]Block{ + "container1": { + { + NUMAAffinity: []int{0}, + Type: v1.ResourceMemory, + Size: 1024, + }, + }, + }, + }, + machineState: NodeMap{ + 0: &NodeState{ + MemoryMap: map[v1.ResourceName]*MemoryTable{ + v1.ResourceMemory: { + Allocatable: 1536, + Free: 512, + Reserved: 1024, + SystemReserved: 512, + TotalMemSize: 2048, + }, + }, + Nodes: []int{}, + }, + }, + }, + { + description: "Two containers", + assignments: ContainerMemoryAssignments{ + "pod": map[string][]Block{ + "container1": { + { + NUMAAffinity: []int{0}, + Type: v1.ResourceMemory, + Size: 512, + }, + }, + "container2": { + { + NUMAAffinity: []int{0}, + Type: v1.ResourceMemory, + Size: 512, + }, + }, + }, + }, + machineState: NodeMap{ + 0: &NodeState{ + MemoryMap: map[v1.ResourceName]*MemoryTable{ + v1.ResourceMemory: { + Allocatable: 1536, + Free: 512, + Reserved: 1024, + SystemReserved: 512, + TotalMemSize: 2048, + }, + }, + Nodes: []int{}, + }, + }, + }, + { + description: "Container without assigned memory", + assignments: ContainerMemoryAssignments{ + "pod": map[string][]Block{ + "container1": {}, + }, + }, + machineState: NodeMap{ + 0: &NodeState{ + MemoryMap: map[v1.ResourceName]*MemoryTable{ + v1.ResourceMemory: { + Allocatable: 1536, + Free: 1536, + Reserved: 0, + SystemReserved: 512, + TotalMemSize: 2048, + }, + }, + Nodes: []int{}, + }, + }, + }, + } + + // create temp dir + testingDir, err := ioutil.TempDir("", "memorymanager_state_test") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(testingDir) + + cpm, err := checkpointmanager.NewCheckpointManager(testingDir) + assert.NoError(t, err, "could not create testing checkpoint manager") + + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + // ensure there is no previous checkpoint + assert.NoError(t, cpm.RemoveCheckpoint(testingCheckpoint), "could not remove testing checkpoint") + + state, err := NewCheckpointState(testingDir, testingCheckpoint, "static", nil) + assert.NoError(t, err, "could not create testing checkpoint manager") + + state.SetMachineState(tc.machineState) + assert.Equal(t, tc.machineState, state.GetMachineState(), "machine state inconsistent") + + for pod := range tc.assignments { + for container, blocks := range tc.assignments[pod] { + state.SetMemoryBlocks(pod, container, blocks) + assert.Equal(t, blocks, state.GetMemoryBlocks(pod, container), "memory block inconsistent") + + state.Delete(pod, container) + assert.Nil(t, state.GetMemoryBlocks(pod, container), "deleted container still existing in state") + } + } + }) + } +} + +func TestCheckpointStateClear(t *testing.T) { + testCases := []struct { + description string + machineState NodeMap + assignments ContainerMemoryAssignments + }{ + { + description: "Valid state cleaning", + assignments: ContainerMemoryAssignments{ + "pod": map[string][]Block{ + "container1": { + { + NUMAAffinity: []int{0}, + Type: v1.ResourceMemory, + Size: 1024, + }, + }, + }, + }, + machineState: NodeMap{ + 0: &NodeState{ + MemoryMap: map[v1.ResourceName]*MemoryTable{ + v1.ResourceMemory: { + Allocatable: 1536, + Free: 512, + Reserved: 1024, + SystemReserved: 512, + TotalMemSize: 2048, + }, + }, + }, + }, + }, + } + + // create temp dir + testingDir, err := ioutil.TempDir("", "memorymanager_state_test") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(testingDir) + + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + state, err := NewCheckpointState(testingDir, testingCheckpoint, "static", nil) + assert.NoError(t, err, "could not create testing checkpoint manager") + + state.SetMachineState(tc.machineState) + state.SetMemoryAssignments(tc.assignments) + + state.ClearState() + assert.Equal(t, NodeMap{}, state.GetMachineState(), "cleared state with non-empty machine state") + assert.Equal(t, ContainerMemoryAssignments{}, state.GetMemoryAssignments(), "cleared state with non-empty memory assignments") + }) + } +} From d0caec90e2a908e30547c986d44035c8584736a2 Mon Sep 17 00:00:00 2001 From: Artyom Lukianov Date: Thu, 8 Oct 2020 17:10:26 +0300 Subject: [PATCH 04/31] memory manager: add the policy interface The commit also adds two policy skeletons: - none - static Signed-off-by: Artyom Lukianov --- pkg/kubelet/cm/memorymanager/policy.go | 40 +++++++++++ pkg/kubelet/cm/memorymanager/policy_none.go | 61 ++++++++++++++++ pkg/kubelet/cm/memorymanager/policy_static.go | 71 +++++++++++++++++++ 3 files changed, 172 insertions(+) create mode 100644 pkg/kubelet/cm/memorymanager/policy.go create mode 100644 pkg/kubelet/cm/memorymanager/policy_none.go create mode 100644 pkg/kubelet/cm/memorymanager/policy_static.go diff --git a/pkg/kubelet/cm/memorymanager/policy.go b/pkg/kubelet/cm/memorymanager/policy.go new file mode 100644 index 000000000000..dea23b335e35 --- /dev/null +++ b/pkg/kubelet/cm/memorymanager/policy.go @@ -0,0 +1,40 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package memorymanager + +import ( + v1 "k8s.io/api/core/v1" + "k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state" + "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" +) + +// Type defines the policy type +type policyType string + +// Policy implements logic for pod container to a memory assignment. +type Policy interface { + Name() string + Start(s state.State) error + // Allocate call is idempotent + Allocate(s state.State, pod *v1.Pod, container *v1.Container) error + // RemoveContainer call is idempotent + RemoveContainer(s state.State, podUID string, containerName string) error + // GetTopologyHints implements the topologymanager.HintProvider Interface + // and is consulted to achieve NUMA aware resource alignment among this + // and other resource controllers. + GetTopologyHints(s state.State, pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint +} diff --git a/pkg/kubelet/cm/memorymanager/policy_none.go b/pkg/kubelet/cm/memorymanager/policy_none.go new file mode 100644 index 000000000000..e91c3ce3439d --- /dev/null +++ b/pkg/kubelet/cm/memorymanager/policy_none.go @@ -0,0 +1,61 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package memorymanager + +import ( + v1 "k8s.io/api/core/v1" + "k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state" + "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" +) + +const policyTypeNone policyType = "none" + +// none is implementation of the policy interface for the none policy, using none +// policy is the same as disable memory management +type none struct{} + +var _ Policy = &none{} + +// NewPolicyNone returns new none policy instance +func NewPolicyNone() Policy { + return &none{} +} + +func (p *none) Name() string { + return string(policyTypeNone) +} + +func (p *none) Start(s state.State) error { + return nil +} + +// Allocate call is idempotent +func (p *none) Allocate(s state.State, pod *v1.Pod, container *v1.Container) error { + return nil +} + +// RemoveContainer call is idempotent +func (p *none) RemoveContainer(s state.State, podUID string, containerName string) error { + return nil +} + +// GetTopologyHints implements the topologymanager.HintProvider Interface +// and is consulted to achieve NUMA aware resource alignment among this +// and other resource controllers. +func (p *none) GetTopologyHints(s state.State, pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint { + return nil +} diff --git a/pkg/kubelet/cm/memorymanager/policy_static.go b/pkg/kubelet/cm/memorymanager/policy_static.go new file mode 100644 index 000000000000..7a053880d6e3 --- /dev/null +++ b/pkg/kubelet/cm/memorymanager/policy_static.go @@ -0,0 +1,71 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package memorymanager + +import ( + cadvisorapi "github.com/google/cadvisor/info/v1" + + v1 "k8s.io/api/core/v1" + "k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state" + "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" +) + +const policyTypeStatic policyType = "static" + +type systemReservedMemory map[int]map[v1.ResourceName]uint64 + +// staticPolicy is implementation of the policy interface for the single NUMA policy +type staticPolicy struct { + // machineInfo contains machine memory related information + machineInfo *cadvisorapi.MachineInfo + // reserved contains memory that reserved for kube + systemReserved systemReservedMemory + // topology manager reference to get container Topology affinity + affinity topologymanager.Store +} + +var _ Policy = &staticPolicy{} + +// NewPolicyStatic returns new single NUMA policy instance +func NewPolicyStatic() Policy { + return &staticPolicy{} +} + +func (p *staticPolicy) Name() string { + return string(policyTypeStatic) +} + +func (p *staticPolicy) Start(s state.State) error { + return nil +} + +// Allocate call is idempotent +func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Container) error { + return nil +} + +// RemoveContainer call is idempotent +func (p *staticPolicy) RemoveContainer(s state.State, podUID string, containerName string) error { + return nil +} + +// GetTopologyHints implements the topologymanager.HintProvider Interface +// and is consulted to achieve NUMA aware resource alignment among this +// and other resource controllers. +func (p *staticPolicy) GetTopologyHints(s state.State, pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint { + return nil +} From 95f81372e2eeece8dc3e32d15a6c57ffe254af90 Mon Sep 17 00:00:00 2001 From: Artyom Lukianov Date: Thu, 19 Mar 2020 12:05:18 +0200 Subject: [PATCH 05/31] memory manager: implement the manager interface methods The commit adds implementation for methods: - Start - AddContainer - Allocate - RemoveContainer - State - GetTopologyHints Signed-off-by: Artyom Lukianov --- .../cm/memorymanager/memory_manager.go | 215 ++++++++++++++++-- 1 file changed, 197 insertions(+), 18 deletions(-) diff --git a/pkg/kubelet/cm/memorymanager/memory_manager.go b/pkg/kubelet/cm/memorymanager/memory_manager.go index 1c2f81d32f5b..b242f483e22b 100644 --- a/pkg/kubelet/cm/memorymanager/memory_manager.go +++ b/pkg/kubelet/cm/memorymanager/memory_manager.go @@ -17,13 +17,16 @@ limitations under the License. package memorymanager import ( + "fmt" + "strconv" + "strings" "sync" - "time" cadvisorapi "github.com/google/cadvisor/info/v1" v1 "k8s.io/api/core/v1" runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" + "k8s.io/klog/v2" "k8s.io/kubernetes/pkg/kubelet/cm/containermap" "k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" @@ -34,7 +37,7 @@ import ( // memoryManagerStateFileName is the file name where memory manager stores its state const memoryManagerStateFileName = "memory_manager_state" -// ActivePodsFunc is a function that returns a list of pods to reconcile. +// ActivePodsFunc is a function that returns a list of active pods type ActivePodsFunc func() []*v1.Pod type runtimeService interface { @@ -61,8 +64,7 @@ type Manager interface { Allocate(pod *v1.Pod, container *v1.Container) error // RemoveContainer is called after Kubelet decides to kill or delete a - // container. After this call, the memory manager stops trying to reconcile - // that container, and any memory allocated to the container are freed. + // container. After this call, any memory allocated to the container are freed. RemoveContainer(containerID string) error // State returns a read-only interface to the internal memory manager state. @@ -72,11 +74,6 @@ type Manager interface { // and is consulted to achieve NUMA aware resource alignment among this // and other resource controllers. GetTopologyHints(*v1.Pod, *v1.Container) map[string][]topologymanager.TopologyHint - - // GetPodTopologyHints implements the topologymanager.HintProvider Interface - // and is consulted to achieve NUMA aware resource alignment among this - // and other resource controllers. - GetPodTopologyHints(*v1.Pod) map[string][]topologymanager.TopologyHint } type manager struct { @@ -116,28 +113,132 @@ type manager struct { var _ Manager = &manager{} // NewManager returns new instance of the memory manager -func NewManager(reconcilePeriod time.Duration, machineInfo *cadvisorapi.MachineInfo, nodeAllocatableReservation v1.ResourceList, stateFileDirectory string, affinity topologymanager.Store) (Manager, error) { - +func NewManager(policyName string, machineInfo *cadvisorapi.MachineInfo, nodeAllocatableReservation v1.ResourceList, stateFileDirectory string, affinity topologymanager.Store) (Manager, error) { + var policy Policy + + switch policyType(policyName) { + + case policyTypeNone: + policy = NewPolicyNone() + + case policyTypeStatic: + reserved, err := getReservedMemory(machineInfo, nodeAllocatableReservation) + if err != nil { + return nil, err + } + policy, err = NewPolicyStatic(machineInfo, reserved, affinity) + if err != nil { + return nil, err + } + + default: + return nil, fmt.Errorf("unknown policy: \"%s\"", policyName) + } + + manager := &manager{ + policy: policy, + nodeAllocatableReservation: nodeAllocatableReservation, + stateFileDirectory: stateFileDirectory, + } + manager.sourcesReady = &sourcesReadyStub{} + return manager, nil } -// Start starts the memory manager reconcile loop under the kubelet to keep state updated +// Start starts the memory manager under the kubelet and calls policy start func (m *manager) Start(activePods ActivePodsFunc, sourcesReady config.SourcesReady, podStatusProvider status.PodStatusProvider, containerRuntime runtimeService, initialContainers containermap.ContainerMap) error { - + klog.Infof("[memorymanager] starting with %s policy", m.policy.Name()) + m.sourcesReady = sourcesReady + m.activePods = activePods + m.podStatusProvider = podStatusProvider + m.containerRuntime = containerRuntime + m.containerMap = initialContainers + + stateImpl, err := state.NewCheckpointState(m.stateFileDirectory, memoryManagerStateFileName, m.policy.Name(), m.containerMap) + if err != nil { + klog.Errorf("[memorymanager] could not initialize checkpoint manager: %v, please drain node and remove policy state file", err) + return err + } + m.state = stateImpl + + err = m.policy.Start(m.state) + if err != nil { + klog.Errorf("[memorymanager] policy start error: %v", err) + return err + } + + return nil } -// AddContainer saves the value of requested memory for the guranteed pod under the state and set memory affinity according to the topolgy manager -func (m *manager) AddContainer(p *v1.Pod, c *v1.Container, containerID string) error { - +// AddContainer saves the value of requested memory for the guaranteed pod under the state and set memory affinity according to the topolgy manager +func (m *manager) AddContainer(pod *v1.Pod, container *v1.Container, containerID string) error { + m.Lock() + m.containerMap.Add(string(pod.UID), container.Name, containerID) + m.Unlock() + + // Get NUMA node affinity of blocks assigned to the container during Allocate() + var nodes []string + for _, block := range m.state.GetMemoryBlocks(string(pod.UID), container.Name) { + for _, nodeID := range block.NUMAAffinity { + nodes = append(nodes, strconv.Itoa(nodeID)) + } + } + + if len(nodes) < 1 { + klog.V(5).Infof("[memorymanager] update container resources is skipped due to memory blocks are empty") + return nil + } + + affinity := strings.Join(nodes, ",") + klog.Infof("[memorymanager] Set container %q cpuset.mems to %q", containerID, affinity) + err := m.containerRuntime.UpdateContainerResources(containerID, &runtimeapi.LinuxContainerResources{CpusetMems: affinity}) + if err != nil { + klog.Errorf("[memorymanager] AddContainer error: error updating cpuset.mems for container (pod: %s, container: %s, container id: %s, err: %v)", pod.Name, container.Name, containerID, err) + + m.Lock() + err = m.policyRemoveContainerByRef(string(pod.UID), container.Name) + if err != nil { + klog.Errorf("[memorymanager] AddContainer rollback state error: %v", err) + } + m.Unlock() + } + return err } // Allocate is called to pre-allocate memory resources during Pod admission. func (m *manager) Allocate(pod *v1.Pod, container *v1.Container) error { - + // Garbage collect any stranded resources before allocation + m.removeStaleState() + + m.Lock() + defer m.Unlock() + + // Call down into the policy to assign this container memory if required. + if err := m.policy.Allocate(m.state, pod, container); err != nil { + klog.Errorf("[memorymanager] Allocate error: %v", err) + return err + } + return nil } // RemoveContainer removes the container from the state func (m *manager) RemoveContainer(containerID string) error { - + m.Lock() + defer m.Unlock() + + // if error appears it means container entry already does not exist under the container map + podUID, containerName, err := m.containerMap.GetContainerRef(containerID) + if err != nil { + klog.Warningf("[memorymanager] Failed to get container %s from container map error: %v", containerID, err) + return nil + } + + err = m.policyRemoveContainerByRef(podUID, containerName) + if err != nil { + klog.Errorf("[memorymanager] RemoveContainer error: %v", err) + return err + } + + return nil } // State returns the state of the manager @@ -147,5 +248,83 @@ func (m *manager) State() state.Reader { // GetTopologyHints returns the topology hints for the topology manager func (m *manager) GetTopologyHints(pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint { + // Garbage collect any stranded resources before providing TopologyHints + m.removeStaleState() + // Delegate to active policy + return m.policy.GetTopologyHints(m.state, pod, container) +} + +// TODO: consider to move this method to manager interface, the only difference between CPU manager is assignments, we can send it to the method +func (m *manager) removeStaleState() { + // Only once all sources are ready do we attempt to remove any stale state. + // This ensures that the call to `m.activePods()` below will succeed with + // the actual active pods list. + if !m.sourcesReady.AllReady() { + return + } + + // We grab the lock to ensure that no new containers will grab memory block while + // executing the code below. Without this lock, its possible that we end up + // removing state that is newly added by an asynchronous call to + // AddContainer() during the execution of this code. + m.Lock() + defer m.Unlock() + + // Get the list of active pods. + activePods := m.activePods() + + // Build a list of (podUID, containerName) pairs for all containers in all active Pods. + activeContainers := make(map[string]map[string]struct{}) + for _, pod := range activePods { + activeContainers[string(pod.UID)] = make(map[string]struct{}) + for _, container := range append(pod.Spec.InitContainers, pod.Spec.Containers...) { + activeContainers[string(pod.UID)][container.Name] = struct{}{} + } + } + + // Loop through the MemoryManager state. Remove any state for containers not + // in the `activeContainers` list built above. + assignments := m.state.GetMemoryAssignments() + for podUID := range assignments { + for containerName := range assignments[podUID] { + if _, ok := activeContainers[podUID][containerName]; !ok { + klog.Infof("[memorymanager] removeStaleState: removing (pod %s, container: %s)", podUID, containerName) + err := m.policyRemoveContainerByRef(podUID, containerName) + if err != nil { + klog.Errorf("[memorymanager] removeStaleState: failed to remove (pod %s, container %s), error: %v)", podUID, containerName, err) + } + } + } + } +} + +func (m *manager) policyRemoveContainerByRef(podUID string, containerName string) error { + err := m.policy.RemoveContainer(m.state, podUID, containerName) + if err == nil { + m.containerMap.RemoveByContainerRef(podUID, containerName) + } + + return err +} +func getReservedMemory(machineInfo *cadvisorapi.MachineInfo, nodeAllocatableReservation v1.ResourceList) (systemReservedMemory, error) { + // TODO: we should add new kubelet parameter, and to get reserved memory per NUMA node from it + // currently we use kube-reserved + system-reserved + eviction reserve for each NUMA node, that creates memory over-consumption + // and no reservation for huge pages + reserved := systemReservedMemory{} + for _, node := range machineInfo.Topology { + memory := nodeAllocatableReservation[v1.ResourceMemory] + if memory.IsZero() { + break + } + value, succeeded := memory.AsInt64() + if !succeeded { + return nil, fmt.Errorf("failed to represent reserved memory as int64") + } + + reserved[node.Id] = map[v1.ResourceName]uint64{ + v1.ResourceMemory: uint64(value), + } + } + return reserved, nil } From b95d45e803c151f82c7195105635b7d5559803c9 Mon Sep 17 00:00:00 2001 From: Artyom Lukianov Date: Thu, 8 Oct 2020 18:23:52 +0300 Subject: [PATCH 06/31] memory manager: add new flag type BracketSeparatedSliceMapStringString Add BracketSeparatedSliceMapStringString to parse config like the below {numa-node=0,type=memory,limit=1Gi},{numa-node=1,type=memory,limit=1Gi} Signed-off-by: Byonggon Chun --- ...acket_separated_slice_map_string_string.go | 118 ++++++++++++ ..._separated_slice_map_string_string_test.go | 178 ++++++++++++++++++ 2 files changed, 296 insertions(+) create mode 100644 staging/src/k8s.io/component-base/cli/flag/bracket_separated_slice_map_string_string.go create mode 100644 staging/src/k8s.io/component-base/cli/flag/bracket_separated_slice_map_string_string_test.go diff --git a/staging/src/k8s.io/component-base/cli/flag/bracket_separated_slice_map_string_string.go b/staging/src/k8s.io/component-base/cli/flag/bracket_separated_slice_map_string_string.go new file mode 100644 index 000000000000..c21780d01b28 --- /dev/null +++ b/staging/src/k8s.io/component-base/cli/flag/bracket_separated_slice_map_string_string.go @@ -0,0 +1,118 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package flag + +import ( + "fmt" + "sort" + "strings" +) + +// BracketSeparatedSliceMapStringString can be set from the command line with the format `--flag {key=value, ...}, {...}`. +// Multiple comma-separated key-value pairs in a braket(`{}`) in a single invocation are supported. For example: `--flag {key=value, key=value, ...}`. +// Multiple braket-separated list of key-value pairs in a single invocation are supported. For example: `--flag {key=value, key=value}, {key=value, key=value}`. +type BracketSeparatedSliceMapStringString struct { + Value *[]map[string]string + initialized bool // set to true after the first Set call +} + +// NewBracketSeparatedSliceMapStringString takes a pointer to a []map[string]string and returns the +// BracketSeparatedSliceMapStringString flag parsing shim for that map +func NewBracketSeparatedSliceMapStringString(m *[]map[string]string) *BracketSeparatedSliceMapStringString { + return &BracketSeparatedSliceMapStringString{Value: m} +} + + +// Set implements github.com/spf13/pflag.Value +func (m *BracketSeparatedSliceMapStringString) Set(value string) error { + if m.Value == nil { + return fmt.Errorf("no target (nil pointer to []map[string]string)") + } + if !m.initialized || *m.Value == nil { + *m.Value = make([]map[string]string, 0) + m.initialized = true + } + + value = strings.TrimSpace(value) + + // split here + //{numa-node=0,memory-type=memory,limit=1Gi},{numa-node=1,memory-type=memory,limit=1Gi},{numa-node=1,memory-type=memory,limit=1Gi} +// for _, split := range strings.Split(value, "{") { +// split = strings.TrimRight(split, ",") +// split = strings.TrimRight(split, "}") + for _, split := range strings.Split(value, ",{") { + //split = strings.TrimRight(split, ",") + split = strings.TrimLeft(split, "{") + split = strings.TrimRight(split, "}") + + if len(split) == 0 { + continue + } + + // now we have "numa-node=1,memory-type=memory,limit=1Gi" + tmpRawMap := make(map[string]string) + + tmpMap:= NewMapStringString(&tmpRawMap) + + if err := tmpMap.Set(split); err != nil { + return fmt.Errorf("could not parse String: (%s): %v", value, err) + } + + *m.Value = append(*m.Value, tmpRawMap) + } + + return nil +} + +// String implements github.com/spf13/pflag.Value +func (m *BracketSeparatedSliceMapStringString) String() string { + if m == nil || m.Value == nil { + return "" + } + + var slices []string + + for _, configMap := range *m.Value { + var tmpPairs []string + + var keys []string + for key := range configMap { + keys = append(keys, key) + } + sort.Strings(keys) + + for _, key := range keys { + tmpPairs = append(tmpPairs, fmt.Sprintf("%s=%s", key, configMap[key])) + } + + if len(tmpPairs) != 0 { + slices = append(slices, "{" + strings.Join(tmpPairs, ",") + "}") + } + } + sort.Strings(slices) + return strings.Join(slices, ",") +} + +// Type implements github.com/spf13/pflag.Value +func (*BracketSeparatedSliceMapStringString) Type() string { + return "BracketSeparatedSliceMapStringString" +} + +// Empty implements OmitEmpty +func (m *BracketSeparatedSliceMapStringString) Empty() bool { + return !m.initialized || m.Value == nil || len(*m.Value) == 0 +} diff --git a/staging/src/k8s.io/component-base/cli/flag/bracket_separated_slice_map_string_string_test.go b/staging/src/k8s.io/component-base/cli/flag/bracket_separated_slice_map_string_string_test.go new file mode 100644 index 000000000000..caea52c87618 --- /dev/null +++ b/staging/src/k8s.io/component-base/cli/flag/bracket_separated_slice_map_string_string_test.go @@ -0,0 +1,178 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package flag + +import ( + "reflect" + "testing" +) + +func TestStringBracketSeparatedSliceMapStringString(t *testing.T) { + var nilSliceMap []map[string]string + testCases := []struct { + desc string + m *BracketSeparatedSliceMapStringString + expect string + }{ + {"nill", NewBracketSeparatedSliceMapStringString(&nilSliceMap), ""}, + {"empty", NewBracketSeparatedSliceMapStringString(&[]map[string]string{}), ""}, + {"one key", NewBracketSeparatedSliceMapStringString(&[]map[string]string{{"a": "string"}}), "{a=string}"}, + {"two keys", NewBracketSeparatedSliceMapStringString(&[]map[string]string{{"a": "string", "b": "string"}}), "{a=string,b=string}"}, + } + for _, tc := range testCases { + t.Run(tc.desc, func(t *testing.T) { + str := tc.m.String() + if tc.expect != str { + t.Fatalf("expect %q but got %q", tc.expect, str) + } + }) + } +} + +func TestSetBracketSeparatedSliceMapStringString(t *testing.T) { + var nilMap []map[string]string + testCases := []struct { + desc string + vals []string + start *BracketSeparatedSliceMapStringString + expect *BracketSeparatedSliceMapStringString + err string + }{ + // we initialize the map with a default key that should be cleared by Set + {"clears defaults", []string{""}, + NewBracketSeparatedSliceMapStringString(&[]map[string]string{{"default": ""}}), + &BracketSeparatedSliceMapStringString{ + initialized: true, + Value: &[]map[string]string{}, + }, ""}, + // make sure we still allocate for "initialized" multimaps where Multimap was initially set to a nil map + {"allocates map if currently nil", []string{""}, + &BracketSeparatedSliceMapStringString{initialized: true, Value: &nilMap}, + &BracketSeparatedSliceMapStringString{ + initialized: true, + Value: &[]map[string]string{}, + }, ""}, + // for most cases, we just reuse nilMap, which should be allocated by Set, and is reset before each test case + {"empty", []string{""}, + NewBracketSeparatedSliceMapStringString(&nilMap), + &BracketSeparatedSliceMapStringString{ + initialized: true, + Value: &[]map[string]string{}, + }, ""}, + {"empty braket", []string{"{}"}, + NewBracketSeparatedSliceMapStringString(&nilMap), + &BracketSeparatedSliceMapStringString{ + initialized: true, + Value: &[]map[string]string{}, + }, ""}, + {"missing braket", []string{"a=string, b=string"}, + NewBracketSeparatedSliceMapStringString(&nilMap), + &BracketSeparatedSliceMapStringString{ + initialized: true, + Value: &[]map[string]string{{"a": "string", "b": "string"}}, + }, ""}, + {"empty key", []string{"{=string}"}, + NewBracketSeparatedSliceMapStringString(&nilMap), + &BracketSeparatedSliceMapStringString{ + initialized: true, + Value: &[]map[string]string{{"": "string"}}, + }, ""}, + {"one key", []string{"{a=string}"}, + NewBracketSeparatedSliceMapStringString(&nilMap), + &BracketSeparatedSliceMapStringString{ + initialized: true, + Value: &[]map[string]string{{"a": "string"}}, + }, ""}, + {"two keys", []string{"{a=string,b=string}"}, + NewBracketSeparatedSliceMapStringString(&nilMap), + &BracketSeparatedSliceMapStringString{ + initialized: true, + Value: &[]map[string]string{{"a": "string", "b": "string"}}, + }, ""}, + {"two duplecated keys", []string{"{a=string,a=string}"}, + NewBracketSeparatedSliceMapStringString(&nilMap), + &BracketSeparatedSliceMapStringString{ + initialized: true, + Value: &[]map[string]string{{"a": "string"}}, + }, ""}, + {"two keys with space", []string{"{a = string, b = string}"}, + NewBracketSeparatedSliceMapStringString(&nilMap), + &BracketSeparatedSliceMapStringString{ + initialized: true, + Value: &[]map[string]string{{"a": "string", "b": "string"}}, + }, ""}, + {"two keys, multiple Set invocations", []string{"{a=string, b=string}", "{a=string, b=string}"}, + NewBracketSeparatedSliceMapStringString(&nilMap), + &BracketSeparatedSliceMapStringString{ + initialized: true, + Value: &[]map[string]string{{"a": "string", "b": "string"}, {"a": "string", "b": "string"}}, + }, ""}, + {"no target", []string{""}, + NewBracketSeparatedSliceMapStringString(nil), + nil, + "no target (nil pointer to []map[string]string)"}, + } + for _, tc := range testCases { + nilMap = nil + t.Run(tc.desc, func(t *testing.T) { + var err error + for _, val := range tc.vals { + err = tc.start.Set(val) + if err != nil { + break + } + } + if tc.err != "" { + if err == nil || err.Error() != tc.err { + t.Fatalf("expect error %s but got %v", tc.err, err) + } + return + } else if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !reflect.DeepEqual(tc.expect, tc.start) { + t.Fatalf("expect %#v but got %#v", tc.expect, tc.start) + } + }) + } +} + +func TestEmptyBracketSeparatedSliceMapStringString(t *testing.T) { + var nilSliceMap []map[string]string + notEmpty := &BracketSeparatedSliceMapStringString{ + Value: &[]map[string]string{{"a": "int", "b": "string", "c": "string"}}, + initialized: true, + } + + testCases := []struct { + desc string + m *BracketSeparatedSliceMapStringString + expect bool + }{ + {"nil", NewBracketSeparatedSliceMapStringString(&nilSliceMap), true}, + {"empty", NewBracketSeparatedSliceMapStringString(&[]map[string]string{}), true}, + {"populated", notEmpty, false}, + } + for _, tc := range testCases { + t.Run(tc.desc, func(t *testing.T) { + ret := tc.m.Empty() + if ret != tc.expect { + t.Fatalf("expect %t but got %t", tc.expect, ret) + } + }) + } +} From 93accb51e4c24bc36b730812a44381b7780ca918 Mon Sep 17 00:00:00 2001 From: Artyom Lukianov Date: Thu, 8 Oct 2020 18:37:36 +0300 Subject: [PATCH 07/31] memory manager: add memory manager flag under kubelet options and kubelet config The commit also includes generated files after `make generated_files`. Signed-off-by: Byonggon Chun --- api/api-rules/violation_exceptions.list | 1 + cmd/kubelet/app/options/options.go | 5 ++ cmd/kubelet/app/server.go | 78 +++++++++++++++++-- pkg/features/kube_features.go | 6 ++ pkg/kubelet/apis/config/types.go | 9 +++ .../config/v1beta1/zz_generated.conversion.go | 4 + .../apis/config/zz_generated.deepcopy.go | 13 ++++ .../k8s.io/kubelet/config/v1beta1/types.go | 22 ++++++ .../config/v1beta1/zz_generated.deepcopy.go | 13 ++++ 9 files changed, 143 insertions(+), 8 deletions(-) diff --git a/api/api-rules/violation_exceptions.list b/api/api-rules/violation_exceptions.list index 65aea40c25e3..665da4da15c8 100644 --- a/api/api-rules/violation_exceptions.list +++ b/api/api-rules/violation_exceptions.list @@ -392,6 +392,7 @@ API rule violation: list_type_missing,k8s.io/kubelet/config/v1alpha1,CredentialP API rule violation: list_type_missing,k8s.io/kubelet/config/v1beta1,KubeletConfiguration,AllowedUnsafeSysctls API rule violation: list_type_missing,k8s.io/kubelet/config/v1beta1,KubeletConfiguration,ClusterDNS API rule violation: list_type_missing,k8s.io/kubelet/config/v1beta1,KubeletConfiguration,EnforceNodeAllocatable +API rule violation: list_type_missing,k8s.io/kubelet/config/v1beta1,KubeletConfiguration,ReservedMemory API rule violation: list_type_missing,k8s.io/kubelet/config/v1beta1,KubeletConfiguration,TLSCipherSuites API rule violation: list_type_missing,k8s.io/metrics/pkg/apis/metrics/v1alpha1,PodMetrics,Containers API rule violation: list_type_missing,k8s.io/metrics/pkg/apis/metrics/v1beta1,PodMetrics,Containers diff --git a/cmd/kubelet/app/options/options.go b/cmd/kubelet/app/options/options.go index 842386af3be3..471d083f5a63 100644 --- a/cmd/kubelet/app/options/options.go +++ b/cmd/kubelet/app/options/options.go @@ -550,4 +550,9 @@ Runtime log sanitization may introduce significant computation overhead and ther // Graduated experimental flags, kept for backward compatibility fs.BoolVar(&c.KernelMemcgNotification, "experimental-kernel-memcg-notification", c.KernelMemcgNotification, "Use kernelMemcgNotification configuration, this flag will be removed in 1.23.") + + // Memory Manager Flags + fs.StringVar(&c.MemoryManagerPolicy, "memory-manager-policy", c.MemoryManagerPolicy, "Memory Manager policy to use. Possible values: 'none', 'static'. Default: 'none'") + // TODO: once documentation link is available, replace KEP link with the documentation one. + fs.Var(cliflag.NewBracketSeparatedSliceMapStringString(&c.ReservedMemory), "reserved-memory", "A comma separated list of bracket-enclosed configuration for memory manager (e.g. {numa-node=0, type=memory, limit=1Gi}, {numa-node=1, type=memory, limit=1Gi}). The total sum for each memory type should be equal to the sum of kube-reserved, system-reserved and eviction-threshold. See more details under https://github.com/kubernetes/enhancements/tree/master/keps/sig-node/1769-memory-manager#reserved-memory-flag") } diff --git a/cmd/kubelet/app/server.go b/cmd/kubelet/app/server.go index ff7e05feec12..127dfc84f8b6 100644 --- a/cmd/kubelet/app/server.go +++ b/cmd/kubelet/app/server.go @@ -71,6 +71,7 @@ import ( "k8s.io/kubernetes/cmd/kubelet/app/options" "k8s.io/kubernetes/pkg/api/legacyscheme" api "k8s.io/kubernetes/pkg/apis/core" + corev1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper" "k8s.io/kubernetes/pkg/capabilities" "k8s.io/kubernetes/pkg/credentialprovider" "k8s.io/kubernetes/pkg/features" @@ -687,6 +688,12 @@ func run(ctx context.Context, s *options.KubeletServer, kubeDeps *kubelet.Depend s.SystemReserved["cpu"] = strconv.Itoa(reservedSystemCPUs.Size()) klog.Infof("After cpu setting is overwritten, KubeReserved=\"%v\", SystemReserved=\"%v\"", s.KubeReserved, s.SystemReserved) } + + reservedMemory, err := parseReservedMemoryConfig(s.ReservedMemory) + if err != nil { + return err + } + kubeReserved, err := parseResourceList(s.KubeReserved) if err != nil { return err @@ -732,14 +739,16 @@ func run(ctx context.Context, s *options.KubeletServer, kubeDeps *kubelet.Depend ReservedSystemCPUs: reservedSystemCPUs, HardEvictionThresholds: hardEvictionThresholds, }, - QOSReserved: *experimentalQOSReserved, - ExperimentalCPUManagerPolicy: s.CPUManagerPolicy, - ExperimentalCPUManagerReconcilePeriod: s.CPUManagerReconcilePeriod.Duration, - ExperimentalPodPidsLimit: s.PodPidsLimit, - EnforceCPULimits: s.CPUCFSQuota, - CPUCFSQuotaPeriod: s.CPUCFSQuotaPeriod.Duration, - ExperimentalTopologyManagerPolicy: s.TopologyManagerPolicy, - ExperimentalTopologyManagerScope: s.TopologyManagerScope, + QOSReserved: *experimentalQOSReserved, + ExperimentalCPUManagerPolicy: s.CPUManagerPolicy, + ExperimentalCPUManagerReconcilePeriod: s.CPUManagerReconcilePeriod.Duration, + ExperimentalMemoryManagerPolicy: s.MemoryManagerPolicy, + ExperimentalMemoryManagerReservedMemory: reservedMemory, + ExperimentalPodPidsLimit: s.PodPidsLimit, + EnforceCPULimits: s.CPUCFSQuota, + CPUCFSQuotaPeriod: s.CPUCFSQuotaPeriod.Duration, + ExperimentalTopologyManagerPolicy: s.TopologyManagerPolicy, + ExperimentalTopologyManagerScope: s.TopologyManagerScope, }, s.FailSwapOn, devicePluginEnabled, @@ -1296,6 +1305,59 @@ func parseResourceList(m map[string]string) (v1.ResourceList, error) { return rl, nil } +func parseReservedMemoryConfig(config []map[string]string) (map[int]map[v1.ResourceName]resource.Quantity, error) { + if len(config) == 0 { + return nil, nil + } + + const ( + indexKey = "numa-node" + typeKey = "type" + limitKey = "limit" + ) + + keys := []string{indexKey, typeKey, limitKey} + + // check whether all keys are present + for _, m := range config { + for _, key := range keys { + if _, exist := m[key]; !exist { + return nil, fmt.Errorf("key: %s is missing in given ReservedMemory flag: %v", key, config) + } + } + } + + parsed := make(map[int]map[v1.ResourceName]resource.Quantity, len(config)) + for _, m := range config { + idxInString, _ := m[indexKey] + idx, err := strconv.Atoi(idxInString) + if err != nil || idx < 0 { + return nil, fmt.Errorf("NUMA index conversion error for value: \"%s\"", idxInString) + } + + typeInString, _ := m[typeKey] + v1Type := v1.ResourceName(typeInString) + if v1Type != v1.ResourceMemory && !corev1helper.IsHugePageResourceName(v1Type) { + return nil, fmt.Errorf("memory type conversion error, unknown type: \"%s\"", typeInString) + } + if corev1helper.IsHugePageResourceName(v1Type) { + if _, err := corev1helper.HugePageSizeFromResourceName(v1Type); err != nil { + return nil, fmt.Errorf("memory type conversion error, unknown type: \"%s\"", typeInString) + } + } + + limitInString, _ := m[limitKey] + limit, err := resource.ParseQuantity(limitInString) + if err != nil || limit.Sign() != 1 { + return nil, fmt.Errorf("memory limit conversion error for value \"%s\"", limitInString) + } + parsed[idx] = make(map[v1.ResourceName]resource.Quantity) + parsed[idx][v1Type] = limit + } + + return parsed, nil +} + // BootstrapKubeletConfigController constructs and bootstrap a configuration controller func BootstrapKubeletConfigController(dynamicConfigDir string, transform dynamickubeletconfig.TransformFunc) (*kubeletconfiginternal.KubeletConfiguration, *dynamickubeletconfig.Controller, error) { if !utilfeature.DefaultFeatureGate.Enabled(features.DynamicKubeletConfig) { diff --git a/pkg/features/kube_features.go b/pkg/features/kube_features.go index 122d1d11975e..f1a47fbdb8c1 100644 --- a/pkg/features/kube_features.go +++ b/pkg/features/kube_features.go @@ -123,6 +123,12 @@ const ( // Enable resource managers to make NUMA aligned decisions TopologyManager featuregate.Feature = "TopologyManager" + // owner: @cynepco3hahue(alukiano) @cezaryzukowski @k-wiatrzyk + // alpha:: v1.20 + + // Allows to set containers memory affinity according to NUMA topology + MemoryManager featuregate.Feature = "MemoryManager" + // owner: @sjenning // beta: v1.11 // diff --git a/pkg/kubelet/apis/config/types.go b/pkg/kubelet/apis/config/types.go index d518a6cf4125..13a5fbbd280e 100644 --- a/pkg/kubelet/apis/config/types.go +++ b/pkg/kubelet/apis/config/types.go @@ -224,6 +224,9 @@ type KubeletConfiguration struct { // CPU Manager reconciliation period. // Requires the CPUManager feature gate to be enabled. CPUManagerReconcilePeriod metav1.Duration + // MemoryManagerPolicy is the name of the policy to use. + // Requires the MemoryManager feature gate to be enabled. + MemoryManagerPolicy string // TopologyManagerPolicy is the name of the policy to use. // Policies other than "none" require the TopologyManager feature gate to be enabled. TopologyManagerPolicy string @@ -382,6 +385,12 @@ type KubeletConfiguration struct { // Defaults to 10 seconds, requires GracefulNodeShutdown feature gate to be enabled. // For example, if ShutdownGracePeriod=30s, and ShutdownGracePeriodCriticalPods=10s, during a node shutdown the first 20 seconds would be reserved for gracefully terminating normal pods, and the last 10 seconds would be reserved for terminating critical pods. ShutdownGracePeriodCriticalPods metav1.Duration + // A comma separated list of bracket-enclosed configurations for memory manager. + // Each configuration describes pre-reserved memory for the particular memory type on a specific NUMA node. + // The Memory Manager validates whether total amount of pre-reserved memory is identical to reserved-memory by the Node Allocatable feature. + // The format is {numa-node=integer, memory-type=string, limit=string} + // (e.g. {numa-node=0, type=memory, limit=1Gi}, {numa-node=1, type=memory, limit=1Gi}) + ReservedMemory []map[string]string } // KubeletAuthorizationMode denotes the authorization mode for the kubelet diff --git a/pkg/kubelet/apis/config/v1beta1/zz_generated.conversion.go b/pkg/kubelet/apis/config/v1beta1/zz_generated.conversion.go index 09aae527ecce..d0865b25746c 100644 --- a/pkg/kubelet/apis/config/v1beta1/zz_generated.conversion.go +++ b/pkg/kubelet/apis/config/v1beta1/zz_generated.conversion.go @@ -274,6 +274,7 @@ func autoConvert_v1beta1_KubeletConfiguration_To_config_KubeletConfiguration(in out.CgroupDriver = in.CgroupDriver out.CPUManagerPolicy = in.CPUManagerPolicy out.CPUManagerReconcilePeriod = in.CPUManagerReconcilePeriod + out.MemoryManagerPolicy = in.MemoryManagerPolicy out.TopologyManagerPolicy = in.TopologyManagerPolicy out.TopologyManagerScope = in.TopologyManagerScope out.QOSReserved = *(*map[string]string)(unsafe.Pointer(&in.QOSReserved)) @@ -352,6 +353,7 @@ func autoConvert_v1beta1_KubeletConfiguration_To_config_KubeletConfiguration(in } out.ShutdownGracePeriod = in.ShutdownGracePeriod out.ShutdownGracePeriodCriticalPods = in.ShutdownGracePeriodCriticalPods + out.ReservedMemory = *(*[]map[string]string)(unsafe.Pointer(&in.ReservedMemory)) return nil } @@ -429,6 +431,7 @@ func autoConvert_config_KubeletConfiguration_To_v1beta1_KubeletConfiguration(in out.CgroupDriver = in.CgroupDriver out.CPUManagerPolicy = in.CPUManagerPolicy out.CPUManagerReconcilePeriod = in.CPUManagerReconcilePeriod + out.MemoryManagerPolicy = in.MemoryManagerPolicy out.TopologyManagerPolicy = in.TopologyManagerPolicy out.TopologyManagerScope = in.TopologyManagerScope out.QOSReserved = *(*map[string]string)(unsafe.Pointer(&in.QOSReserved)) @@ -505,6 +508,7 @@ func autoConvert_config_KubeletConfiguration_To_v1beta1_KubeletConfiguration(in } out.ShutdownGracePeriod = in.ShutdownGracePeriod out.ShutdownGracePeriodCriticalPods = in.ShutdownGracePeriodCriticalPods + out.ReservedMemory = *(*[]map[string]string)(unsafe.Pointer(&in.ReservedMemory)) return nil } diff --git a/pkg/kubelet/apis/config/zz_generated.deepcopy.go b/pkg/kubelet/apis/config/zz_generated.deepcopy.go index e458d832294f..5dc85843b6ef 100644 --- a/pkg/kubelet/apis/config/zz_generated.deepcopy.go +++ b/pkg/kubelet/apis/config/zz_generated.deepcopy.go @@ -273,6 +273,19 @@ func (in *KubeletConfiguration) DeepCopyInto(out *KubeletConfiguration) { out.Logging = in.Logging out.ShutdownGracePeriod = in.ShutdownGracePeriod out.ShutdownGracePeriodCriticalPods = in.ShutdownGracePeriodCriticalPods + if in.ReservedMemory != nil { + in, out := &in.ReservedMemory, &out.ReservedMemory + *out = make([]map[string]string, len(*in)) + for i := range *in { + if (*in)[i] != nil { + in, out := &(*in)[i], &(*out)[i] + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + } + } return } diff --git a/staging/src/k8s.io/kubelet/config/v1beta1/types.go b/staging/src/k8s.io/kubelet/config/v1beta1/types.go index aa37d6df2809..c75d129f2356 100644 --- a/staging/src/k8s.io/kubelet/config/v1beta1/types.go +++ b/staging/src/k8s.io/kubelet/config/v1beta1/types.go @@ -73,6 +73,13 @@ const ( // PodTopologyManagerScope represents that // topology policy is applied on a per-pod basis. PodTopologyManagerScope = "pod" + // NoneMemoryManagerPolicy is a memory manager none policy, under the none policy + // the memory manager will not pin containers memory of guaranteed pods + NoneMemoryManagerPolicy = "none" + // StaticMemoryManagerPolicy is a memory manager static policy, under the static policy + // the memory manager will try to pin containers memory of guaranteed pods to the smallest + // possible sub-set of NUMA nodes + StaticMemoryManagerPolicy = "static" ) // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object @@ -433,6 +440,13 @@ type KubeletConfiguration struct { // Default: "10s" // +optional CPUManagerReconcilePeriod metav1.Duration `json:"cpuManagerReconcilePeriod,omitempty"` + // MemoryManagerPolicy is the name of the policy to use by memory manager. + // Requires the MemoryManager feature gate to be enabled. + // Dynamic Kubelet Config (beta): This field should not be updated without a full node + // reboot. It is safest to keep this value the same as the local config. + // Default: "none" + // +optional + MemoryManagerPolicy string `json:"memoryManagerPolicy,omitempty"` // TopologyManagerPolicy is the name of the policy to use. // Policies other than "none" require the TopologyManager feature gate to be enabled. // Dynamic Kubelet Config (beta): This field should not be updated without a full node @@ -824,6 +838,14 @@ type KubeletConfiguration struct { // Default: "10s" // +optional ShutdownGracePeriodCriticalPods metav1.Duration `json:"shutdownGracePeriodCriticalPods,omitempty"` + // A comma separated list of bracket-enclosed configurations for memory manager. + // Each configuration describes pre-reserved memory for the certain memory type on a specific NUMA node. + // The Memory Manager validates whether total amount of pre-reserved memory is identical to reserved-memory by the Node Allocatable feature. + // The format is {numa-node=integer, memory-type=string, limit=string} + // (e.g. {numa-node=0, type=memory, limit=1Gi}, {numa-node=1, type=memory, limit=1Gi}) + // Default: nil + // +optional + ReservedMemory []map[string]string `json:"reservedMemory,omitempty"` } type KubeletAuthorizationMode string diff --git a/staging/src/k8s.io/kubelet/config/v1beta1/zz_generated.deepcopy.go b/staging/src/k8s.io/kubelet/config/v1beta1/zz_generated.deepcopy.go index a6ad075c9ad7..a6c7f56023ef 100644 --- a/staging/src/k8s.io/kubelet/config/v1beta1/zz_generated.deepcopy.go +++ b/staging/src/k8s.io/kubelet/config/v1beta1/zz_generated.deepcopy.go @@ -303,6 +303,19 @@ func (in *KubeletConfiguration) DeepCopyInto(out *KubeletConfiguration) { } out.ShutdownGracePeriod = in.ShutdownGracePeriod out.ShutdownGracePeriodCriticalPods = in.ShutdownGracePeriodCriticalPods + if in.ReservedMemory != nil { + in, out := &in.ReservedMemory, &out.ReservedMemory + *out = make([]map[string]string, len(*in)) + for i := range *in { + if (*in)[i] != nil { + in, out := &(*in)[i], &(*out)[i] + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + } + } return } From 9ae499ae46fa1c08d1da9a4cd7e7dff01012aa21 Mon Sep 17 00:00:00 2001 From: Artyom Lukianov Date: Thu, 8 Oct 2020 18:47:57 +0300 Subject: [PATCH 08/31] memory manager: pass memory manager flags to the container manager Pass memory manager flags to the container manager and call all relevant memory manager methods under the container manager. Signed-off-by: Byonggon Chun --- pkg/kubelet/cm/container_manager.go | 19 ++++---- pkg/kubelet/cm/container_manager_linux.go | 47 ++++++++++++++++++- .../cm/internal_container_lifecycle.go | 9 ++++ .../cm/memorymanager/memory_manager.go | 19 ++++++-- 4 files changed, 81 insertions(+), 13 deletions(-) diff --git a/pkg/kubelet/cm/container_manager.go b/pkg/kubelet/cm/container_manager.go index ea81d6163a75..fcebd79fd758 100644 --- a/pkg/kubelet/cm/container_manager.go +++ b/pkg/kubelet/cm/container_manager.go @@ -19,6 +19,7 @@ package cm import ( "time" + "k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/util/sets" // TODO: Migrate kubelet to either use its own internal objects or client library. v1 "k8s.io/api/core/v1" @@ -131,14 +132,16 @@ type NodeConfig struct { KubeletRootDir string ProtectKernelDefaults bool NodeAllocatableConfig - QOSReserved map[v1.ResourceName]int64 - ExperimentalCPUManagerPolicy string - ExperimentalTopologyManagerScope string - ExperimentalCPUManagerReconcilePeriod time.Duration - ExperimentalPodPidsLimit int64 - EnforceCPULimits bool - CPUCFSQuotaPeriod time.Duration - ExperimentalTopologyManagerPolicy string + QOSReserved map[v1.ResourceName]int64 + ExperimentalCPUManagerPolicy string + ExperimentalTopologyManagerScope string + ExperimentalCPUManagerReconcilePeriod time.Duration + ExperimentalMemoryManagerPolicy string + ExperimentalMemoryManagerReservedMemory map[int]map[v1.ResourceName]resource.Quantity + ExperimentalPodPidsLimit int64 + EnforceCPULimits bool + CPUCFSQuotaPeriod time.Duration + ExperimentalTopologyManagerPolicy string } type NodeAllocatableConfig struct { diff --git a/pkg/kubelet/cm/container_manager_linux.go b/pkg/kubelet/cm/container_manager_linux.go index 8db1638d1793..a514ce68c431 100644 --- a/pkg/kubelet/cm/container_manager_linux.go +++ b/pkg/kubelet/cm/container_manager_linux.go @@ -53,6 +53,7 @@ import ( "k8s.io/kubernetes/pkg/kubelet/cm/containermap" "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager" "k8s.io/kubernetes/pkg/kubelet/cm/devicemanager" + "k8s.io/kubernetes/pkg/kubelet/cm/memorymanager" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" cmutil "k8s.io/kubernetes/pkg/kubelet/cm/util" "k8s.io/kubernetes/pkg/kubelet/config" @@ -138,6 +139,8 @@ type containerManagerImpl struct { deviceManager devicemanager.Manager // Interface for CPU affinity management. cpuManager cpumanager.Manager + // Interface for memory affinity management. + memoryManager memorymanager.Manager // Interface for Topology resource co-ordination topologyManager topologymanager.Manager } @@ -341,6 +344,22 @@ func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.I cm.topologyManager.AddHintProvider(cm.cpuManager) } + if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.MemoryManager) { + cm.memoryManager, err = memorymanager.NewManager( + nodeConfig.ExperimentalMemoryManagerPolicy, + machineInfo, + cm.GetNodeAllocatableReservation(), + nodeConfig.ExperimentalMemoryManagerReservedMemory, + nodeConfig.KubeletRootDir, + cm.topologyManager, + ) + if err != nil { + klog.Errorf("failed to initialize memory manager: %v", err) + return nil, err + } + cm.topologyManager.AddHintProvider(cm.memoryManager) + } + return cm, nil } @@ -364,7 +383,7 @@ func (cm *containerManagerImpl) NewPodContainerManager() PodContainerManager { } func (cm *containerManagerImpl) InternalContainerLifecycle() InternalContainerLifecycle { - return &internalContainerLifecycleImpl{cm.cpuManager, cm.topologyManager} + return &internalContainerLifecycleImpl{cm.cpuManager, cm.memoryManager, cm.topologyManager} } // Create a cgroup container manager. @@ -606,6 +625,18 @@ func (cm *containerManagerImpl) Start(node *v1.Node, } } + // Initialize memory manager + if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.MemoryManager) { + containerMap, err := buildContainerMapFromRuntime(runtimeService) + if err != nil { + return fmt.Errorf("failed to build map of initial containers from runtime: %v", err) + } + err = cm.memoryManager.Start(memorymanager.ActivePodsFunc(activePods), sourcesReady, podStatusProvider, runtimeService, containerMap) + if err != nil { + return fmt.Errorf("start memory manager error: %v", err) + } + } + // cache the node Info including resource capacity and // allocatable of the node cm.nodeInfo = node @@ -706,11 +737,12 @@ func (cm *containerManagerImpl) GetAllocateResourcesPodAdmitHandler() lifecycle. // work as we add more and more hint providers that the TopologyManager // needs to call Allocate() on (that may not be directly intstantiated // inside this component). - return &resourceAllocator{cm.cpuManager, cm.deviceManager} + return &resourceAllocator{cm.cpuManager, cm.memoryManager, cm.deviceManager} } type resourceAllocator struct { cpuManager cpumanager.Manager + memoryManager memorymanager.Manager deviceManager devicemanager.Manager } @@ -737,6 +769,17 @@ func (m *resourceAllocator) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle } } } + + if m.memoryManager != nil { + err = m.memoryManager.Allocate(pod, &container) + if err != nil { + return lifecycle.PodAdmitResult{ + Message: fmt.Sprintf("Allocate failed due to %v, which is unexpected", err), + Reason: "UnexpectedAdmissionError", + Admit: false, + } + } + } } return lifecycle.PodAdmitResult{Admit: true} diff --git a/pkg/kubelet/cm/internal_container_lifecycle.go b/pkg/kubelet/cm/internal_container_lifecycle.go index 0635ea0ed4f4..0d3a3357b068 100644 --- a/pkg/kubelet/cm/internal_container_lifecycle.go +++ b/pkg/kubelet/cm/internal_container_lifecycle.go @@ -22,6 +22,7 @@ import ( runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" kubefeatures "k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager" + "k8s.io/kubernetes/pkg/kubelet/cm/memorymanager" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" ) @@ -35,6 +36,7 @@ type InternalContainerLifecycle interface { // Implements InternalContainerLifecycle interface. type internalContainerLifecycleImpl struct { cpuManager cpumanager.Manager + memoryManager memorymanager.Manager topologyManager topologymanager.Manager } @@ -43,6 +45,13 @@ func (i *internalContainerLifecycleImpl) PreStartContainer(pod *v1.Pod, containe i.cpuManager.AddContainer(pod, container, containerID) } + if i.memoryManager != nil { + err := i.memoryManager.AddContainer(pod, container, containerID) + if err != nil { + return err + } + } + if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.TopologyManager) { err := i.topologyManager.AddContainer(pod, containerID) if err != nil { diff --git a/pkg/kubelet/cm/memorymanager/memory_manager.go b/pkg/kubelet/cm/memorymanager/memory_manager.go index b242f483e22b..b298ca384d9a 100644 --- a/pkg/kubelet/cm/memorymanager/memory_manager.go +++ b/pkg/kubelet/cm/memorymanager/memory_manager.go @@ -25,6 +25,7 @@ import ( cadvisorapi "github.com/google/cadvisor/info/v1" v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" "k8s.io/klog/v2" "k8s.io/kubernetes/pkg/kubelet/cm/containermap" @@ -113,7 +114,7 @@ type manager struct { var _ Manager = &manager{} // NewManager returns new instance of the memory manager -func NewManager(policyName string, machineInfo *cadvisorapi.MachineInfo, nodeAllocatableReservation v1.ResourceList, stateFileDirectory string, affinity topologymanager.Store) (Manager, error) { +func NewManager(policyName string, machineInfo *cadvisorapi.MachineInfo, nodeAllocatableReservation v1.ResourceList, reservedMemory map[int]map[v1.ResourceName]resource.Quantity, stateFileDirectory string, affinity topologymanager.Store) (Manager, error) { var policy Policy switch policyType(policyName) { @@ -122,10 +123,11 @@ func NewManager(policyName string, machineInfo *cadvisorapi.MachineInfo, nodeAll policy = NewPolicyNone() case policyTypeStatic: - reserved, err := getReservedMemory(machineInfo, nodeAllocatableReservation) + reserved, err := getReservedMemory(machineInfo, nodeAllocatableReservation, reservedMemory) if err != nil { return nil, err } + policy, err = NewPolicyStatic(machineInfo, reserved, affinity) if err != nil { return nil, err @@ -307,10 +309,21 @@ func (m *manager) policyRemoveContainerByRef(podUID string, containerName string return err } -func getReservedMemory(machineInfo *cadvisorapi.MachineInfo, nodeAllocatableReservation v1.ResourceList) (systemReservedMemory, error) { +func validateReservedMemory(nodeAllocatableReservation v1.ResourceList, reservedMemory map[int]map[v1.ResourceName]resource.Quantity) error { + // TODO: this will check equality of total reserved memory by node allocatable feature and total pre-reserved memory + + return nil +} + +func getReservedMemory(machineInfo *cadvisorapi.MachineInfo, nodeAllocatableReservation v1.ResourceList, reservedMemory map[int]map[v1.ResourceName]resource.Quantity) (systemReservedMemory, error) { // TODO: we should add new kubelet parameter, and to get reserved memory per NUMA node from it // currently we use kube-reserved + system-reserved + eviction reserve for each NUMA node, that creates memory over-consumption // and no reservation for huge pages + + if err := validateReservedMemory(nodeAllocatableReservation, reservedMemory); err != nil { + return nil, err + } + reserved := systemReservedMemory{} for _, node := range machineInfo.Topology { memory := nodeAllocatableReservation[v1.ResourceMemory] From 711e85af24d68af379f28a8030d3d5156236a5d8 Mon Sep 17 00:00:00 2001 From: Krzysztof Wiatrzyk Date: Wed, 22 Apr 2020 15:51:44 +0200 Subject: [PATCH 09/31] memory manager: adding additional tests for server.go file, for parseReservedMemoryConfig() function. --- cmd/kubelet/app/server_test.go | 57 ++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/cmd/kubelet/app/server_test.go b/cmd/kubelet/app/server_test.go index 1db214ab954b..c80d1fd4d272 100644 --- a/cmd/kubelet/app/server_test.go +++ b/cmd/kubelet/app/server_test.go @@ -61,3 +61,60 @@ func TestValueOfAllocatableResources(t *testing.T) { } } } + +func TestValueOfReservedMemoryConfig(t *testing.T) { + testCases := []struct { + config []map[string]string + errorExpected bool + name string + }{ + { + config: []map[string]string{{"numa-node": "0", "type": "memory", "limit": "2Gi"}}, + errorExpected: false, + name: "Valid resource quantity", + }, + { + config: []map[string]string{{"numa-node": "0", "type": "memory", "limit": "2000m"}, {"numa-node": "1", "type": "memory", "limit": "1Gi"}}, + errorExpected: false, + name: "Valid resource quantity", + }, + { + config: []map[string]string{{"type": "memory", "limit": "2Gi"}}, + errorExpected: true, + name: "Missing key", + }, + { + config: []map[string]string{{"numa-node": "one", "type": "memory", "limit": "2Gi"}}, + errorExpected: true, + name: "Wrong 'numa-node' value", + }, + { + config: []map[string]string{{"numa-node": "0", "type": "not-memory", "limit": "2Gi"}}, + errorExpected: true, + name: "Wrong 'memory' value", + }, + { + config: []map[string]string{{"numa-node": "0", "type": "memory", "limit": "2Gigs"}}, + errorExpected: true, + name: "Wrong 'limit' value", + }, + { + config: []map[string]string{{"numa-node": "-1", "type": "memory", "limit": "2Gigs"}}, + errorExpected: true, + name: "Invalid 'numa-node' number", + }, + } + + for _, test := range testCases { + _, err := parseReservedMemoryConfig(test.config) + if test.errorExpected { + if err == nil { + t.Errorf("%s: error expected", test.name) + } + } else { + if err != nil { + t.Errorf("%s: unexpected error: %v", test.name, err) + } + } + } +} From 4a6410291804306e6409da1736acb92edd24c3a2 Mon Sep 17 00:00:00 2001 From: Cezary Zukowski Date: Thu, 23 Apr 2020 17:56:47 +0200 Subject: [PATCH 10/31] memory manager: validate reserved-memory against Node Allocatable Reserved memory of all kinds (and over all NUMA nodes) must be equal to the values determined by Node Allocatable feature. Signed-off-by: Cezary Zukowski --- pkg/features/kube_features.go | 1 + .../cm/memorymanager/memory_manager.go | 103 +++++++--- .../cm/memorymanager/memory_manager_test.go | 181 ++++++++++++++++++ ...acket_separated_slice_map_string_string.go | 17 +- ..._separated_slice_map_string_string_test.go | 10 +- 5 files changed, 268 insertions(+), 44 deletions(-) create mode 100644 pkg/kubelet/cm/memorymanager/memory_manager_test.go diff --git a/pkg/features/kube_features.go b/pkg/features/kube_features.go index f1a47fbdb8c1..0801411b39c7 100644 --- a/pkg/features/kube_features.go +++ b/pkg/features/kube_features.go @@ -703,6 +703,7 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS ExpandInUsePersistentVolumes: {Default: true, PreRelease: featuregate.Beta}, ExpandCSIVolumes: {Default: true, PreRelease: featuregate.Beta}, CPUManager: {Default: true, PreRelease: featuregate.Beta}, + MemoryManager: {Default: false, PreRelease: featuregate.Alpha}, CPUCFSQuotaPeriod: {Default: false, PreRelease: featuregate.Alpha}, TopologyManager: {Default: true, PreRelease: featuregate.Beta}, ServiceNodeExclusion: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in 1.22 diff --git a/pkg/kubelet/cm/memorymanager/memory_manager.go b/pkg/kubelet/cm/memorymanager/memory_manager.go index b298ca384d9a..59d19e618c39 100644 --- a/pkg/kubelet/cm/memorymanager/memory_manager.go +++ b/pkg/kubelet/cm/memorymanager/memory_manager.go @@ -28,6 +28,7 @@ import ( "k8s.io/apimachinery/pkg/api/resource" runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" "k8s.io/klog/v2" + corev1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper" "k8s.io/kubernetes/pkg/kubelet/cm/containermap" "k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" @@ -101,8 +102,6 @@ type manager struct { // for all containers a pod containerMap containermap.ContainerMap - nodeAllocatableReservation v1.ResourceList - // sourcesReady provides the readiness of kubelet configuration sources such as apiserver update readiness. // We use it to determine when we can purge inactive pods from checkpointed state. sourcesReady config.SourcesReady @@ -123,12 +122,12 @@ func NewManager(policyName string, machineInfo *cadvisorapi.MachineInfo, nodeAll policy = NewPolicyNone() case policyTypeStatic: - reserved, err := getReservedMemory(machineInfo, nodeAllocatableReservation, reservedMemory) + systemReserved, err := getSystemReservedMemory(machineInfo, nodeAllocatableReservation, reservedMemory) if err != nil { return nil, err } - policy, err = NewPolicyStatic(machineInfo, reserved, affinity) + policy, err = NewPolicyStatic(machineInfo, systemReserved, affinity) if err != nil { return nil, err } @@ -138,9 +137,8 @@ func NewManager(policyName string, machineInfo *cadvisorapi.MachineInfo, nodeAll } manager := &manager{ - policy: policy, - nodeAllocatableReservation: nodeAllocatableReservation, - stateFileDirectory: stateFileDirectory, + policy: policy, + stateFileDirectory: stateFileDirectory, } manager.sourcesReady = &sourcesReadyStub{} return manager, nil @@ -309,35 +307,86 @@ func (m *manager) policyRemoveContainerByRef(podUID string, containerName string return err } -func validateReservedMemory(nodeAllocatableReservation v1.ResourceList, reservedMemory map[int]map[v1.ResourceName]resource.Quantity) error { - // TODO: this will check equality of total reserved memory by node allocatable feature and total pre-reserved memory +func getTotalMemoryTypeReserved(preReservedMemory map[int]map[v1.ResourceName]resource.Quantity) map[v1.ResourceName]resource.Quantity { + totalMemoryType := map[v1.ResourceName]resource.Quantity{} - return nil + for _, node := range preReservedMemory { + for memType, memVal := range node { + if totalMem, exists := totalMemoryType[memType]; exists { + memVal.Add(totalMem) + } + totalMemoryType[memType] = memVal + } + } + + return totalMemoryType } -func getReservedMemory(machineInfo *cadvisorapi.MachineInfo, nodeAllocatableReservation v1.ResourceList, reservedMemory map[int]map[v1.ResourceName]resource.Quantity) (systemReservedMemory, error) { - // TODO: we should add new kubelet parameter, and to get reserved memory per NUMA node from it - // currently we use kube-reserved + system-reserved + eviction reserve for each NUMA node, that creates memory over-consumption - // and no reservation for huge pages +func validateReservedMemory(nodeAllocatableReservation v1.ResourceList, reservedMemory map[int]map[v1.ResourceName]resource.Quantity) error { + totalMemoryType := getTotalMemoryTypeReserved(reservedMemory) - if err := validateReservedMemory(nodeAllocatableReservation, reservedMemory); err != nil { - return nil, err + commonMemoryTypeSet := make(map[v1.ResourceName]bool) + for resourceType := range totalMemoryType { + if !(corev1helper.IsHugePageResourceName(resourceType) || resourceType == v1.ResourceMemory) { + continue + } + commonMemoryTypeSet[resourceType] = true + } + for resourceType := range nodeAllocatableReservation { + if !(corev1helper.IsHugePageResourceName(resourceType) || resourceType == v1.ResourceMemory) { + continue + } + commonMemoryTypeSet[resourceType] = true } - reserved := systemReservedMemory{} - for _, node := range machineInfo.Topology { - memory := nodeAllocatableReservation[v1.ResourceMemory] - if memory.IsZero() { - break + for resourceType := range commonMemoryTypeSet { + nodeAllocatableMemory := resource.NewQuantity(0, resource.DecimalSI) + if memValue, set := nodeAllocatableReservation[resourceType]; set { + nodeAllocatableMemory.Add(memValue) } - value, succeeded := memory.AsInt64() - if !succeeded { - return nil, fmt.Errorf("failed to represent reserved memory as int64") + + reservedMemory := resource.NewQuantity(0, resource.DecimalSI) + if memValue, set := totalMemoryType[resourceType]; set { + reservedMemory.Add(memValue) + } + + if !(*nodeAllocatableMemory).Equal(*reservedMemory) { + return fmt.Errorf("the total amount of memory of type \"%s\" is not equal to the value determined by Node Allocatable feature", resourceType) } + } - reserved[node.Id] = map[v1.ResourceName]uint64{ - v1.ResourceMemory: uint64(value), + return nil +} + +func convertReserved(machineInfo *cadvisorapi.MachineInfo, reservedMemory map[int]map[v1.ResourceName]resource.Quantity) (systemReservedMemory, error) { + preReservedMemoryConverted := make(map[int]map[v1.ResourceName]uint64) + for _, node := range machineInfo.Topology { + preReservedMemoryConverted[node.Id] = make(map[v1.ResourceName]uint64) + } + + for numaIndex := range reservedMemory { + for memoryType := range reservedMemory[numaIndex] { + tmp := reservedMemory[numaIndex][memoryType] + if val, success := tmp.AsInt64(); success { + preReservedMemoryConverted[numaIndex][memoryType] = uint64(val) + } else { + return nil, fmt.Errorf("could not covert a variable of type Quantity to int64") + } } } - return reserved, nil + + return preReservedMemoryConverted, nil +} + +func getSystemReservedMemory(machineInfo *cadvisorapi.MachineInfo, nodeAllocatableReservation v1.ResourceList, preReservedMemory map[int]map[v1.ResourceName]resource.Quantity) (systemReservedMemory, error) { + if err := validateReservedMemory(nodeAllocatableReservation, preReservedMemory); err != nil { + return nil, err + } + + reservedMemoryConverted, err := convertReserved(machineInfo, preReservedMemory) + if err != nil { + return nil, err + } + + return reservedMemoryConverted, nil } diff --git a/pkg/kubelet/cm/memorymanager/memory_manager_test.go b/pkg/kubelet/cm/memorymanager/memory_manager_test.go new file mode 100644 index 000000000000..ae6ec42e6ad0 --- /dev/null +++ b/pkg/kubelet/cm/memorymanager/memory_manager_test.go @@ -0,0 +1,181 @@ +package memorymanager + +import ( + "fmt" + "reflect" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + + info "github.com/google/cadvisor/info/v1" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" +) + +const ( + hugepages2M = "hugepages-2Mi" + hugepages1G = "hugepages-1Gi" +) + +type nodeResources map[v1.ResourceName]resource.Quantity + +// validateReservedMemory +func TestValidatePreReservedMemory(t *testing.T) { + const msgNotEqual = "the total amount of memory of type \"%s\" is not equal to the value determined by Node Allocatable feature" + testCases := []struct { + description string + nodeAllocatableReservation v1.ResourceList + preReservedMemory map[int]map[v1.ResourceName]resource.Quantity + expectedError string + }{ + { + "Node Allocatable not set, pre-reserved not set", + v1.ResourceList{}, + map[int]map[v1.ResourceName]resource.Quantity{}, + "", + }, + { + "Node Allocatable set to zero, pre-reserved set to zero", + v1.ResourceList{v1.ResourceMemory: *resource.NewQuantity(0, resource.DecimalSI)}, + map[int]map[v1.ResourceName]resource.Quantity{ + 0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(0, resource.DecimalSI)}, + }, + "", + }, + { + "Node Allocatable not set (equal zero), pre-reserved set", + v1.ResourceList{}, + map[int]map[v1.ResourceName]resource.Quantity{ + 0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(12, resource.DecimalSI)}, + }, + fmt.Sprintf(msgNotEqual, v1.ResourceMemory), + }, + { + "Node Allocatable set, pre-reserved not set", + v1.ResourceList{hugepages2M: *resource.NewQuantity(5, resource.DecimalSI)}, + map[int]map[v1.ResourceName]resource.Quantity{}, + fmt.Sprintf(msgNotEqual, hugepages2M), + }, + { + "Pre-reserved not equal to Node Allocatable", + v1.ResourceList{v1.ResourceMemory: *resource.NewQuantity(5, resource.DecimalSI)}, + map[int]map[v1.ResourceName]resource.Quantity{ + 0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(12, resource.DecimalSI)}, + }, + fmt.Sprintf(msgNotEqual, v1.ResourceMemory), + }, + { + "Pre-reserved total equal to Node Allocatable", + v1.ResourceList{v1.ResourceMemory: *resource.NewQuantity(17, resource.DecimalSI), + hugepages2M: *resource.NewQuantity(77, resource.DecimalSI), + hugepages1G: *resource.NewQuantity(13, resource.DecimalSI)}, + map[int]map[v1.ResourceName]resource.Quantity{ + 0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(12, resource.DecimalSI), + hugepages2M: *resource.NewQuantity(70, resource.DecimalSI), + hugepages1G: *resource.NewQuantity(13, resource.DecimalSI)}, + 1: nodeResources{v1.ResourceMemory: *resource.NewQuantity(5, resource.DecimalSI), + hugepages2M: *resource.NewQuantity(7, resource.DecimalSI)}, + }, + "", + }, + { + "Pre-reserved total hugapages-2M not equal to Node Allocatable", + v1.ResourceList{v1.ResourceMemory: *resource.NewQuantity(17, resource.DecimalSI), + hugepages2M: *resource.NewQuantity(14, resource.DecimalSI), + hugepages1G: *resource.NewQuantity(13, resource.DecimalSI)}, + map[int]map[v1.ResourceName]resource.Quantity{ + 0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(12, resource.DecimalSI), + hugepages2M: *resource.NewQuantity(70, resource.DecimalSI), + hugepages1G: *resource.NewQuantity(13, resource.DecimalSI)}, + 1: nodeResources{v1.ResourceMemory: *resource.NewQuantity(5, resource.DecimalSI), + hugepages2M: *resource.NewQuantity(7, resource.DecimalSI)}, + }, + fmt.Sprintf(msgNotEqual, hugepages2M), + }, + } + + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + err := validateReservedMemory(tc.nodeAllocatableReservation, tc.preReservedMemory) + if strings.TrimSpace(tc.expectedError) != "" { + assert.Error(t, err) + assert.Equal(t, err.Error(), tc.expectedError) + } + }) + } +} + +func TestConvertPreReserved(t *testing.T) { + machineInfo := info.MachineInfo{ + Topology: []info.Node{ + info.Node{Id: 0}, + info.Node{Id: 1}, + }, + } + + testCases := []struct { + description string + reserved map[int]map[v1.ResourceName]resource.Quantity + reservedExpected reservedMemory + expectedError string + }{ + { + "Empty", + map[int]map[v1.ResourceName]resource.Quantity{}, + reservedMemory{ + 0: map[v1.ResourceName]uint64{}, + 1: map[v1.ResourceName]uint64{}, + }, + "", + }, + { + "Single NUMA node is pre-reserved", + map[int]map[v1.ResourceName]resource.Quantity{ + 0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(12, resource.DecimalSI), + hugepages2M: *resource.NewQuantity(70, resource.DecimalSI), + hugepages1G: *resource.NewQuantity(13, resource.DecimalSI)}, + }, + reservedMemory{ + 0: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 12, + hugepages2M: 70, + hugepages1G: 13, + }, + 1: map[v1.ResourceName]uint64{}, + }, + "", + }, + { + "Both NUMA nodes are pre-reserved", + map[int]map[v1.ResourceName]resource.Quantity{ + 0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(12, resource.DecimalSI), + hugepages2M: *resource.NewQuantity(70, resource.DecimalSI), + hugepages1G: *resource.NewQuantity(13, resource.DecimalSI)}, + 1: nodeResources{v1.ResourceMemory: *resource.NewQuantity(5, resource.DecimalSI), + hugepages2M: *resource.NewQuantity(7, resource.DecimalSI)}, + }, + reservedMemory{ + 0: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 12, + hugepages2M: 70, + hugepages1G: 13, + }, + 1: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 5, + hugepages2M: 7, + }, + }, + "", + }, + } + + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + reserved, _ := convertReserved(&machineInfo, tc.reserved) + if !reflect.DeepEqual(reserved, tc.reservedExpected) { + t.Errorf("got %v, expected %v", reserved, tc.reservedExpected) + } + }) + } +} diff --git a/staging/src/k8s.io/component-base/cli/flag/bracket_separated_slice_map_string_string.go b/staging/src/k8s.io/component-base/cli/flag/bracket_separated_slice_map_string_string.go index c21780d01b28..e3a99c872bdd 100644 --- a/staging/src/k8s.io/component-base/cli/flag/bracket_separated_slice_map_string_string.go +++ b/staging/src/k8s.io/component-base/cli/flag/bracket_separated_slice_map_string_string.go @@ -23,10 +23,10 @@ import ( ) // BracketSeparatedSliceMapStringString can be set from the command line with the format `--flag {key=value, ...}, {...}`. -// Multiple comma-separated key-value pairs in a braket(`{}`) in a single invocation are supported. For example: `--flag {key=value, key=value, ...}`. -// Multiple braket-separated list of key-value pairs in a single invocation are supported. For example: `--flag {key=value, key=value}, {key=value, key=value}`. +// Multiple comma-separated key-value pairs in brackets (`{}`) in a single invocation are supported. For example: `--flag {key=value, key=value, ...}`. +// Multiple bracket-separated list of key-value pairs in a single invocation are supported. For example: `--flag {key=value, key=value}, {key=value, key=value}`. type BracketSeparatedSliceMapStringString struct { - Value *[]map[string]string + Value *[]map[string]string initialized bool // set to true after the first Set call } @@ -36,7 +36,6 @@ func NewBracketSeparatedSliceMapStringString(m *[]map[string]string) *BracketSep return &BracketSeparatedSliceMapStringString{Value: m} } - // Set implements github.com/spf13/pflag.Value func (m *BracketSeparatedSliceMapStringString) Set(value string) error { if m.Value == nil { @@ -49,13 +48,7 @@ func (m *BracketSeparatedSliceMapStringString) Set(value string) error { value = strings.TrimSpace(value) - // split here - //{numa-node=0,memory-type=memory,limit=1Gi},{numa-node=1,memory-type=memory,limit=1Gi},{numa-node=1,memory-type=memory,limit=1Gi} -// for _, split := range strings.Split(value, "{") { -// split = strings.TrimRight(split, ",") -// split = strings.TrimRight(split, "}") for _, split := range strings.Split(value, ",{") { - //split = strings.TrimRight(split, ",") split = strings.TrimLeft(split, "{") split = strings.TrimRight(split, "}") @@ -66,7 +59,7 @@ func (m *BracketSeparatedSliceMapStringString) Set(value string) error { // now we have "numa-node=1,memory-type=memory,limit=1Gi" tmpRawMap := make(map[string]string) - tmpMap:= NewMapStringString(&tmpRawMap) + tmpMap := NewMapStringString(&tmpRawMap) if err := tmpMap.Set(split); err != nil { return fmt.Errorf("could not parse String: (%s): %v", value, err) @@ -100,7 +93,7 @@ func (m *BracketSeparatedSliceMapStringString) String() string { } if len(tmpPairs) != 0 { - slices = append(slices, "{" + strings.Join(tmpPairs, ",") + "}") + slices = append(slices, "{"+strings.Join(tmpPairs, ",")+"}") } } sort.Strings(slices) diff --git a/staging/src/k8s.io/component-base/cli/flag/bracket_separated_slice_map_string_string_test.go b/staging/src/k8s.io/component-base/cli/flag/bracket_separated_slice_map_string_string_test.go index caea52c87618..84049d1ebc2b 100644 --- a/staging/src/k8s.io/component-base/cli/flag/bracket_separated_slice_map_string_string_test.go +++ b/staging/src/k8s.io/component-base/cli/flag/bracket_separated_slice_map_string_string_test.go @@ -28,7 +28,7 @@ func TestStringBracketSeparatedSliceMapStringString(t *testing.T) { m *BracketSeparatedSliceMapStringString expect string }{ - {"nill", NewBracketSeparatedSliceMapStringString(&nilSliceMap), ""}, + {"nil", NewBracketSeparatedSliceMapStringString(&nilSliceMap), ""}, {"empty", NewBracketSeparatedSliceMapStringString(&[]map[string]string{}), ""}, {"one key", NewBracketSeparatedSliceMapStringString(&[]map[string]string{{"a": "string"}}), "{a=string}"}, {"two keys", NewBracketSeparatedSliceMapStringString(&[]map[string]string{{"a": "string", "b": "string"}}), "{a=string,b=string}"}, @@ -73,13 +73,13 @@ func TestSetBracketSeparatedSliceMapStringString(t *testing.T) { initialized: true, Value: &[]map[string]string{}, }, ""}, - {"empty braket", []string{"{}"}, + {"empty bracket", []string{"{}"}, NewBracketSeparatedSliceMapStringString(&nilMap), &BracketSeparatedSliceMapStringString{ initialized: true, Value: &[]map[string]string{}, }, ""}, - {"missing braket", []string{"a=string, b=string"}, + {"missing bracket", []string{"a=string, b=string"}, NewBracketSeparatedSliceMapStringString(&nilMap), &BracketSeparatedSliceMapStringString{ initialized: true, @@ -103,13 +103,13 @@ func TestSetBracketSeparatedSliceMapStringString(t *testing.T) { initialized: true, Value: &[]map[string]string{{"a": "string", "b": "string"}}, }, ""}, - {"two duplecated keys", []string{"{a=string,a=string}"}, + {"two duplicated keys", []string{"{a=string,a=string}"}, NewBracketSeparatedSliceMapStringString(&nilMap), &BracketSeparatedSliceMapStringString{ initialized: true, Value: &[]map[string]string{{"a": "string"}}, }, ""}, - {"two keys with space", []string{"{a = string, b = string}"}, + {"two keys with spaces", []string{"{a = string, b = string}"}, NewBracketSeparatedSliceMapStringString(&nilMap), &BracketSeparatedSliceMapStringString{ initialized: true, From afb1ae3458fced0d2fb8c11831ab9d7203520db0 Mon Sep 17 00:00:00 2001 From: Artyom Lukianov Date: Tue, 24 Mar 2020 17:18:41 +0200 Subject: [PATCH 11/31] memory manager: add fake memory manager The fake memory manager needed for the unittesting. Signed-off-by: Artyom Lukianov --- pkg/kubelet/cm/container_manager_stub.go | 3 +- pkg/kubelet/cm/container_manager_windows.go | 3 +- pkg/kubelet/cm/fake_container_manager.go | 3 +- .../cm/memorymanager/fake_memory_manager.go | 72 +++++++++++++++++++ 4 files changed, 78 insertions(+), 3 deletions(-) create mode 100644 pkg/kubelet/cm/memorymanager/fake_memory_manager.go diff --git a/pkg/kubelet/cm/container_manager_stub.go b/pkg/kubelet/cm/container_manager_stub.go index 65eac7aadcfa..ac4ceee2c56e 100644 --- a/pkg/kubelet/cm/container_manager_stub.go +++ b/pkg/kubelet/cm/container_manager_stub.go @@ -24,6 +24,7 @@ import ( internalapi "k8s.io/cri-api/pkg/apis" podresourcesapi "k8s.io/kubelet/pkg/apis/podresources/v1" "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager" + "k8s.io/kubernetes/pkg/kubelet/cm/memorymanager" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" "k8s.io/kubernetes/pkg/kubelet/config" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" @@ -102,7 +103,7 @@ func (cm *containerManagerStub) UpdatePluginResources(*schedulerframework.NodeIn } func (cm *containerManagerStub) InternalContainerLifecycle() InternalContainerLifecycle { - return &internalContainerLifecycleImpl{cpumanager.NewFakeManager(), topologymanager.NewFakeManager()} + return &internalContainerLifecycleImpl{cpumanager.NewFakeManager(), memorymanager.NewFakeManager(), topologymanager.NewFakeManager()} } func (cm *containerManagerStub) GetPodCgroupRoot() string { diff --git a/pkg/kubelet/cm/container_manager_windows.go b/pkg/kubelet/cm/container_manager_windows.go index 072ea63fabeb..c3d07f270c30 100644 --- a/pkg/kubelet/cm/container_manager_windows.go +++ b/pkg/kubelet/cm/container_manager_windows.go @@ -37,6 +37,7 @@ import ( "k8s.io/kubernetes/pkg/kubelet/cadvisor" "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager" "k8s.io/kubernetes/pkg/kubelet/cm/devicemanager" + "k8s.io/kubernetes/pkg/kubelet/cm/memorymanager" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" "k8s.io/kubernetes/pkg/kubelet/config" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" @@ -208,7 +209,7 @@ func (cm *containerManagerImpl) UpdatePluginResources(node *schedulerframework.N } func (cm *containerManagerImpl) InternalContainerLifecycle() InternalContainerLifecycle { - return &internalContainerLifecycleImpl{cpumanager.NewFakeManager(), topologymanager.NewFakeManager()} + return &internalContainerLifecycleImpl{cpumanager.NewFakeManager(), memorymanager.NewFakeManager(), topologymanager.NewFakeManager()} } func (cm *containerManagerImpl) GetPodCgroupRoot() string { diff --git a/pkg/kubelet/cm/fake_container_manager.go b/pkg/kubelet/cm/fake_container_manager.go index eb01b1ab3f2f..027fffc7e72a 100644 --- a/pkg/kubelet/cm/fake_container_manager.go +++ b/pkg/kubelet/cm/fake_container_manager.go @@ -25,6 +25,7 @@ import ( internalapi "k8s.io/cri-api/pkg/apis" podresourcesapi "k8s.io/kubelet/pkg/apis/podresources/v1" "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager" + "k8s.io/kubernetes/pkg/kubelet/cm/memorymanager" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" "k8s.io/kubernetes/pkg/kubelet/config" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" @@ -156,7 +157,7 @@ func (cm *FakeContainerManager) InternalContainerLifecycle() InternalContainerLi cm.Lock() defer cm.Unlock() cm.CalledFunctions = append(cm.CalledFunctions, "InternalContainerLifecycle") - return &internalContainerLifecycleImpl{cpumanager.NewFakeManager(), topologymanager.NewFakeManager()} + return &internalContainerLifecycleImpl{cpumanager.NewFakeManager(), memorymanager.NewFakeManager(), topologymanager.NewFakeManager()} } func (cm *FakeContainerManager) GetPodCgroupRoot() string { diff --git a/pkg/kubelet/cm/memorymanager/fake_memory_manager.go b/pkg/kubelet/cm/memorymanager/fake_memory_manager.go new file mode 100644 index 000000000000..334476f702ee --- /dev/null +++ b/pkg/kubelet/cm/memorymanager/fake_memory_manager.go @@ -0,0 +1,72 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package memorymanager + +import ( + v1 "k8s.io/api/core/v1" + "k8s.io/klog" + "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/containermap" + "k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state" + "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" + "k8s.io/kubernetes/pkg/kubelet/config" + "k8s.io/kubernetes/pkg/kubelet/status" +) + +type fakeManager struct { + state state.State +} + +func (m *fakeManager) Start(activePods ActivePodsFunc, sourcesReady config.SourcesReady, podStatusProvider status.PodStatusProvider, containerRuntime runtimeService, initialContainers containermap.ContainerMap) error { + klog.Info("[fake memorymanager] Start()") + return nil +} + +func (m *fakeManager) Policy() Policy { + klog.Info("[fake memorymanager] Policy()") + return NewPolicyNone() +} + +func (m *fakeManager) Allocate(pod *v1.Pod, container *v1.Container) error { + klog.Infof("[fake memorymanager] Allocate (pod: %s, container: %s", pod.Name, container.Name) + return nil +} + +func (m *fakeManager) AddContainer(pod *v1.Pod, container *v1.Container, containerID string) error { + klog.Infof("[fake memorymanager] AddContainer (pod: %s, container: %s, container id: %s)", pod.Name, container.Name, containerID) + return nil +} + +func (m *fakeManager) RemoveContainer(containerID string) error { + klog.Infof("[fake memorymanager] RemoveContainer (container id: %s)", containerID) + return nil +} + +func (m *fakeManager) GetTopologyHints(pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint { + klog.Infof("[fake memorymanager] Get Topology Hints") + return map[string][]topologymanager.TopologyHint{} +} + +func (m *fakeManager) State() state.Reader { + return m.state +} + +// NewFakeManager creates empty/fake memory manager +func NewFakeManager() Manager { + return &fakeManager{ + state: state.NewMemoryState(), + } +} From 371c918e6c058a6ece9f8bcef025746ffab65b0e Mon Sep 17 00:00:00 2001 From: Artyom Lukianov Date: Wed, 1 Apr 2020 10:32:32 +0300 Subject: [PATCH 12/31] memory manager: add memory manager policy to defaulter and conversion files Signed-off-by: Artyom Lukianov --- pkg/kubelet/apis/config/v1beta1/defaults.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pkg/kubelet/apis/config/v1beta1/defaults.go b/pkg/kubelet/apis/config/v1beta1/defaults.go index 3b46d4164b32..c45764ae5e97 100644 --- a/pkg/kubelet/apis/config/v1beta1/defaults.go +++ b/pkg/kubelet/apis/config/v1beta1/defaults.go @@ -23,6 +23,7 @@ import ( kruntime "k8s.io/apimachinery/pkg/runtime" componentbaseconfigv1alpha1 "k8s.io/component-base/config/v1alpha1" kubeletconfigv1beta1 "k8s.io/kubelet/config/v1beta1" + // TODO: Cut references to k8s.io/kubernetes, eventually there should be none from this package "k8s.io/kubernetes/pkg/cluster/ports" "k8s.io/kubernetes/pkg/kubelet/qos" @@ -154,6 +155,9 @@ func SetDefaults_KubeletConfiguration(obj *kubeletconfigv1beta1.KubeletConfigura // Keep the same as default NodeStatusUpdateFrequency obj.CPUManagerReconcilePeriod = metav1.Duration{Duration: 10 * time.Second} } + if obj.MemoryManagerPolicy == "" { + obj.MemoryManagerPolicy = kubeletconfigv1beta1.NoneMemoryManagerPolicy + } if obj.TopologyManagerPolicy == "" { obj.TopologyManagerPolicy = kubeletconfigv1beta1.NoneTopologyManagerPolicy } From abb94bec518dd36c1ea8b405bc21c9a220d91c33 Mon Sep 17 00:00:00 2001 From: Artyom Lukianov Date: Sun, 29 Mar 2020 17:37:57 +0300 Subject: [PATCH 13/31] memory manager: implement the memory manager static policy - The `Allocate` method will try to allocate the memory according to the affinity hints saved under the `TopologyManager` store. If the store does not have any hints for the memory it will call `getDefaultHint` to get the default hint. If the affinity does not satisfy the memory request, it will call `extendTopologyManagerHint` to extend the topology hint to satisfy the memory request. Once it has the preferred hint, it will allocate the memory and update the the memory manager state accordingly. - The `RemoveContainer` will release the allocated memory and update the memory manager state accordingly. - The `GetTopologyHints` method will try to re-generate topology hints when the container already presents under the memory manager state. If it does not present it will call `calculateHints` to get topology hints. The `calculateHints` uses an approach similar to the one used under the CPU manager: 1. If the container memory request can be satisfied by the single NUMA node, it will not allocate the memory from more than one NUMA node and it will set only single NUMA hints as the preferred one. It can affect the density, but it gives us guarantees regarding the NUMA alignment. 2. The NUMA node used in the multi NUMA assignment can not be used in the single NUMA assignment. And the NUMA node used in the single NUMA assignment can not be used in the multi NUMA assignment. 3. Only hints with NUMA node that have enough memory will be returned. Signed-off-by: Artyom Lukianov --- .../cm/memorymanager/memory_manager.go | 1 - pkg/kubelet/cm/memorymanager/policy_static.go | 633 +++++++++++++++++- 2 files changed, 630 insertions(+), 4 deletions(-) diff --git a/pkg/kubelet/cm/memorymanager/memory_manager.go b/pkg/kubelet/cm/memorymanager/memory_manager.go index 59d19e618c39..0b711244c479 100644 --- a/pkg/kubelet/cm/memorymanager/memory_manager.go +++ b/pkg/kubelet/cm/memorymanager/memory_manager.go @@ -254,7 +254,6 @@ func (m *manager) GetTopologyHints(pod *v1.Pod, container *v1.Container) map[str return m.policy.GetTopologyHints(m.state, pod, container) } -// TODO: consider to move this method to manager interface, the only difference between CPU manager is assignments, we can send it to the method func (m *manager) removeStaleState() { // Only once all sources are ready do we attempt to remove any stale state. // This ensures that the call to `m.activePods()` below will succeed with diff --git a/pkg/kubelet/cm/memorymanager/policy_static.go b/pkg/kubelet/cm/memorymanager/policy_static.go index 7a053880d6e3..5cee6851448b 100644 --- a/pkg/kubelet/cm/memorymanager/policy_static.go +++ b/pkg/kubelet/cm/memorymanager/policy_static.go @@ -17,18 +17,27 @@ limitations under the License. package memorymanager import ( + "fmt" + "reflect" + "sort" + cadvisorapi "github.com/google/cadvisor/info/v1" v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/klog/v2" + corehelper "k8s.io/kubernetes/pkg/apis/core/v1/helper" + v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos" "k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" + "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask" ) const policyTypeStatic policyType = "static" type systemReservedMemory map[int]map[v1.ResourceName]uint64 -// staticPolicy is implementation of the policy interface for the single NUMA policy +// SingleNUMAPolicy is implementation of the policy interface for the single NUMA policy type staticPolicy struct { // machineInfo contains machine memory related information machineInfo *cadvisorapi.MachineInfo @@ -41,8 +50,25 @@ type staticPolicy struct { var _ Policy = &staticPolicy{} // NewPolicyStatic returns new single NUMA policy instance -func NewPolicyStatic() Policy { - return &staticPolicy{} +func NewPolicyStatic(machineInfo *cadvisorapi.MachineInfo, reserved systemReservedMemory, affinity topologymanager.Store) (Policy, error) { + var totalSystemReserved uint64 + for _, node := range reserved { + if _, ok := node[v1.ResourceMemory]; !ok { + continue + } + totalSystemReserved += node[v1.ResourceMemory] + } + + // check if we have some reserved memory for the system + if totalSystemReserved <= 0 { + return nil, fmt.Errorf("[memorymanager] you should specify the system reserved memory") + } + + return &staticPolicy{ + machineInfo: machineInfo, + systemReserved: reserved, + affinity: affinity, + }, nil } func (p *staticPolicy) Name() string { @@ -50,16 +76,165 @@ func (p *staticPolicy) Name() string { } func (p *staticPolicy) Start(s state.State) error { + if err := p.validateState(s); err != nil { + klog.Errorf("[memorymanager] Invalid state: %v, please drain node and remove policy state file", err) + return err + } return nil } // Allocate call is idempotent func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Container) error { + // allocate the memory only for guaranteed pods + if v1qos.GetPodQOS(pod) != v1.PodQOSGuaranteed { + return nil + } + + klog.Infof("[memorymanager] Allocate (pod: %s, container: %s)", pod.Name, container.Name) + if blocks := s.GetMemoryBlocks(string(pod.UID), container.Name); blocks != nil { + klog.Infof("[memorymanager] Container already present in state, skipping (pod: %s, container: %s)", pod.Name, container.Name) + return nil + } + + // Call Topology Manager to get the aligned affinity across all hint providers. + hint := p.affinity.GetAffinity(string(pod.UID), container.Name) + klog.Infof("[memorymanager] Pod %v, Container %v Topology Affinity is: %v", pod.UID, container.Name, hint) + + requestedResources, err := getRequestedResources(container) + if err != nil { + return err + } + + bestHint := &hint + // topology manager returned the hint with NUMA affinity nil + // we should use the default NUMA affinity calculated the same way as for the topology manager + if hint.NUMANodeAffinity == nil { + defaultHint, err := p.getDefaultHint(s, requestedResources) + if err != nil { + return err + } + + if !defaultHint.Preferred && bestHint.Preferred { + return fmt.Errorf("[memorymanager] failed to find the default preferred hint") + } + bestHint = defaultHint + } + + machineState := s.GetMachineState() + + // topology manager returns the hint that does not satisfy completely the container request + // we should extend this hint to the one who will satisfy the request and include the current hint + if !isAffinitySatisfyRequest(machineState, bestHint.NUMANodeAffinity, requestedResources) { + extendedHint, err := p.extendTopologyManagerHint(s, requestedResources, bestHint.NUMANodeAffinity) + if err != nil { + return err + } + + if !extendedHint.Preferred && bestHint.Preferred { + return fmt.Errorf("[memorymanager] failed to find the extended preferred hint") + } + bestHint = extendedHint + } + + var containerBlocks []state.Block + maskBits := bestHint.NUMANodeAffinity.GetBits() + for resourceName, requestedSize := range requestedResources { + // update memory blocks + containerBlocks = append(containerBlocks, state.Block{ + NUMAAffinity: maskBits, + Size: requestedSize, + Type: resourceName, + }) + + // Update nodes memory state + for _, nodeId := range maskBits { + machineState[nodeId].NumberOfAssignments++ + machineState[nodeId].Nodes = maskBits + + // we need to continue to update all affinity mask nodes + if requestedSize == 0 { + continue + } + + // update the node memory state + nodeResourceMemoryState := machineState[nodeId].MemoryMap[resourceName] + if nodeResourceMemoryState.Free <= 0 { + continue + } + + // the node has enough memory to satisfy the request + if nodeResourceMemoryState.Free >= requestedSize { + nodeResourceMemoryState.Reserved += requestedSize + nodeResourceMemoryState.Free -= requestedSize + requestedSize = 0 + continue + } + + // the node does not have enough memory, use the node remaining memory and move to the next node + requestedSize -= nodeResourceMemoryState.Free + nodeResourceMemoryState.Reserved += nodeResourceMemoryState.Free + nodeResourceMemoryState.Free = 0 + } + } + + s.SetMachineState(machineState) + s.SetMemoryBlocks(string(pod.UID), container.Name, containerBlocks) + return nil } // RemoveContainer call is idempotent func (p *staticPolicy) RemoveContainer(s state.State, podUID string, containerName string) error { + klog.Infof("[memorymanager] RemoveContainer (pod: %s, container: %s)", podUID, containerName) + blocks := s.GetMemoryBlocks(podUID, containerName) + if blocks == nil { + return nil + } + + s.Delete(podUID, containerName) + + // Mutate machine memory state to update free and reserved memory + machineState := s.GetMachineState() + for _, b := range blocks { + releasedSize := b.Size + for _, nodeId := range b.NUMAAffinity { + machineState[nodeId].NumberOfAssignments-- + + // once we do not have any memory allocations on this node, clear node groups + if machineState[nodeId].NumberOfAssignments == 0 { + machineState[nodeId].Nodes = []int{nodeId} + } + + // we still need to pass over all NUMA node under the affinity mask to update them + if releasedSize == 0 { + continue + } + + nodeResourceMemoryState := machineState[nodeId].MemoryMap[b.Type] + + // if the node does not have reserved memory to free, continue to the next node + if nodeResourceMemoryState.Reserved == 0 { + continue + } + + // the reserved memory smaller than the amount of the memory that should be released + // release as much as possible and move to the next node + if nodeResourceMemoryState.Reserved < releasedSize { + releasedSize -= nodeResourceMemoryState.Reserved + nodeResourceMemoryState.Free += nodeResourceMemoryState.Reserved + nodeResourceMemoryState.Reserved = 0 + continue + } + + // the reserved memory big enough to satisfy the released memory + nodeResourceMemoryState.Free += releasedSize + nodeResourceMemoryState.Reserved -= releasedSize + releasedSize = 0 + } + } + + s.SetMachineState(machineState) + return nil } @@ -67,5 +242,457 @@ func (p *staticPolicy) RemoveContainer(s state.State, podUID string, containerNa // and is consulted to achieve NUMA aware resource alignment among this // and other resource controllers. func (p *staticPolicy) GetTopologyHints(s state.State, pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint { + if v1qos.GetPodQOS(pod) != v1.PodQOSGuaranteed { + return nil + } + + requestedResources, err := getRequestedResources(container) + if err != nil { + klog.Error(err.Error()) + return nil + } + + hints := map[string][]topologymanager.TopologyHint{} + for resourceName := range requestedResources { + hints[string(resourceName)] = []topologymanager.TopologyHint{} + } + + containerBlocks := s.GetMemoryBlocks(string(pod.UID), container.Name) + // Short circuit to regenerate the same hints if there are already + // memory allocated for the container. This might happen after a + // kubelet restart, for example. + if containerBlocks != nil { + if len(containerBlocks) != len(requestedResources) { + klog.Errorf("[memorymanager] The number of requested resources by the container %s differs from the number of memory blocks", container.Name) + return nil + } + + for _, b := range containerBlocks { + if _, ok := requestedResources[b.Type]; !ok { + klog.Errorf("[memorymanager] Container %s requested resources do not have resource of type %s", container.Name, b.Type) + return nil + } + + if b.Size != requestedResources[b.Type] { + klog.Errorf("[memorymanager] Memory %s already allocated to (pod %v, container %v) with different number than request: requested: %d, allocated: %d", b.Type, pod.UID, container.Name, requestedResources[b.Type], b.Size) + return nil + } + + containerNUMAAffinity, err := bitmask.NewBitMask(b.NUMAAffinity...) + if err != nil { + klog.Errorf("[memorymanager] failed to generate NUMA bitmask: %v", err) + return nil + } + + klog.Infof("[memorymanager] Regenerating TopologyHints, %s was already allocated to (pod %v, container %v)", b.Type, pod.UID, container.Name) + hints[string(b.Type)] = append(hints[string(b.Type)], topologymanager.TopologyHint{ + NUMANodeAffinity: containerNUMAAffinity, + Preferred: true, + }) + } + return hints + } + + return p.calculateHints(s, requestedResources) +} + +func getRequestedResources(container *v1.Container) (map[v1.ResourceName]uint64, error) { + requestedResources := map[v1.ResourceName]uint64{} + for resourceName, quantity := range container.Resources.Requests { + if resourceName != v1.ResourceMemory && !corehelper.IsHugePageResourceName(resourceName) { + continue + } + requestedSize, succeed := quantity.AsInt64() + if !succeed { + return nil, fmt.Errorf("[memorymanager] failed to represent quantity as int64") + } + requestedResources[resourceName] = uint64(requestedSize) + } + return requestedResources, nil +} + +func (p *staticPolicy) calculateHints(s state.State, requestedResources map[v1.ResourceName]uint64) map[string][]topologymanager.TopologyHint { + machineState := s.GetMachineState() + var numaNodes []int + for n := range machineState { + numaNodes = append(numaNodes, n) + } + sort.Ints(numaNodes) + + // Initialize minAffinitySize to include all NUMA Nodes. + minAffinitySize := len(numaNodes) + + hints := map[string][]topologymanager.TopologyHint{} + bitmask.IterateBitMasks(numaNodes, func(mask bitmask.BitMask) { + maskBits := mask.GetBits() + singleNUMAHint := len(maskBits) == 1 + + // the node already in group with another node, it can not be used for the single NUMA node allocation + if singleNUMAHint && len(machineState[maskBits[0]].Nodes) > 1 { + return + } + + totalFreeSize := map[v1.ResourceName]uint64{} + totalAllocatableSize := map[v1.ResourceName]uint64{} + // calculate total free memory for the node mask + for _, nodeID := range maskBits { + // the node already used for the memory allocation + if !singleNUMAHint && machineState[nodeID].NumberOfAssignments > 0 { + // the node used for the single NUMA memory allocation, it can not be used for the multi NUMA node allocation + if len(machineState[nodeID].Nodes) == 1 { + return + } + + // the node already used with different group of nodes, it can not be use with in the current hint + if !areGroupsEqual(machineState[nodeID].Nodes, maskBits) { + return + } + } + + for resourceName := range requestedResources { + if _, ok := totalFreeSize[resourceName]; !ok { + totalFreeSize[resourceName] = 0 + } + totalFreeSize[resourceName] += machineState[nodeID].MemoryMap[resourceName].Free + + if _, ok := totalAllocatableSize[resourceName]; !ok { + totalAllocatableSize[resourceName] = 0 + } + totalAllocatableSize[resourceName] += machineState[nodeID].MemoryMap[resourceName].Allocatable + } + } + + // verify that for all memory types the node mask has enough allocatable resources + for resourceName, requestedSize := range requestedResources { + if totalAllocatableSize[resourceName] < requestedSize { + return + } + } + + // set the minimum amount of NUMA nodes that can satisfy the container resources requests + if mask.Count() < minAffinitySize { + minAffinitySize = mask.Count() + } + + // verify that for all memory types the node mask has enough free resources + for resourceName, requestedSize := range requestedResources { + if totalFreeSize[resourceName] < requestedSize { + return + } + } + + // add the node mask as topology hint for all memory types + for resourceName := range requestedResources { + if _, ok := hints[string(resourceName)]; !ok { + hints[string(resourceName)] = []topologymanager.TopologyHint{} + } + hints[string(resourceName)] = append(hints[string(resourceName)], topologymanager.TopologyHint{ + NUMANodeAffinity: mask, + Preferred: false, + }) + } + }) + + // update hints preferred according to multiNUMAGroups, in case when it wasn't provided, the default + // behaviour to prefer the minimal amount of NUMA nodes will be used + for resourceName := range requestedResources { + for i, hint := range hints[string(resourceName)] { + hints[string(resourceName)][i].Preferred = p.isHintPreferred(hint.NUMANodeAffinity.GetBits(), minAffinitySize) + } + } + + return hints +} + +func (p *staticPolicy) isHintPreferred(maskBits []int, minAffinitySize int) bool { + return len(maskBits) == minAffinitySize +} + +func areGroupsEqual(group1, group2 []int) bool { + sort.Ints(group1) + sort.Ints(group2) + + if len(group1) != len(group2) { + return false + } + + for i, elm := range group1 { + if group2[i] != elm { + return false + } + } + return true +} + +func (p *staticPolicy) validateState(s state.State) error { + machineState := s.GetMachineState() + memoryAssignments := s.GetMemoryAssignments() + + if len(machineState) == 0 { + // Machine state cannot be empty when assignments exist + if len(memoryAssignments) != 0 { + return fmt.Errorf("[memorymanager] machine state can not be empty when it has memory assignments") + } + + defaultMachineState := p.getDefaultMachineState() + s.SetMachineState(defaultMachineState) + + return nil + } + + // calculate all memory assigned to containers + expectedMachineState := p.getDefaultMachineState() + for pod, container := range memoryAssignments { + for containerName, blocks := range container { + for _, b := range blocks { + requestedSize := b.Size + for _, nodeID := range b.NUMAAffinity { + nodeState, ok := expectedMachineState[nodeID] + if !ok { + return fmt.Errorf("[memorymanager] (pod: %s, container: %s) the memory assignment uses the NUMA that does not exist", pod, containerName) + } + + nodeState.NumberOfAssignments++ + nodeState.Nodes = b.NUMAAffinity + + memoryState, ok := nodeState.MemoryMap[b.Type] + if !ok { + return fmt.Errorf("[memorymanager] (pod: %s, container: %s) the memory assignment uses memory resource that does not exist", pod, containerName) + } + + if requestedSize == 0 { + continue + } + + // this node does not have enough memory continue to the next one + if memoryState.Free <= 0 { + continue + } + + // the node has enough memory to satisfy the request + if memoryState.Free >= requestedSize { + memoryState.Reserved += requestedSize + memoryState.Free -= requestedSize + requestedSize = 0 + continue + } + + // the node does not have enough memory, use the node remaining memory and move to the next node + requestedSize -= memoryState.Free + memoryState.Reserved += memoryState.Free + memoryState.Free = 0 + } + } + } + } + + // State has already been initialized from file (is not empty) + // Validate that total size, system reserved and reserved memory not changed, it can happen, when: + // - adding or removing physical memory bank from the node + // - change of kubelet system-reserved, kube-reserved or pre-reserved-memory-zone parameters + if !areMachineStatesEqual(machineState, expectedMachineState) { + return fmt.Errorf("[memorymanager] the expected machine state is different from the real one") + } + return nil } + +func areMachineStatesEqual(ms1, ms2 state.NodeMap) bool { + if len(ms1) != len(ms2) { + klog.Errorf("[memorymanager] node states are different len(ms1) != len(ms2): %d != %d", len(ms1), len(ms2)) + return false + } + + for nodeId, nodeState1 := range ms1 { + nodeState2, ok := ms2[nodeId] + if !ok { + klog.Errorf("[memorymanager] node state does not have node ID %d", nodeId) + return false + } + + if nodeState1.NumberOfAssignments != nodeState2.NumberOfAssignments { + klog.Errorf("[memorymanager] node states number of assignments are different: %d != %d", nodeState1.NumberOfAssignments, nodeState2.NumberOfAssignments) + return false + } + + if !areGroupsEqual(nodeState1.Nodes, nodeState2.Nodes) { + klog.Errorf("[memorymanager] node states groups are different: %v != %v", nodeState1.Nodes, nodeState2.Nodes) + return false + } + + if len(nodeState1.MemoryMap) != len(nodeState2.MemoryMap) { + klog.Errorf("[memorymanager] node states memory map have different length: %d != %d", len(nodeState1.MemoryMap), len(nodeState2.MemoryMap)) + return false + } + + for resourceName, memoryState1 := range nodeState1.MemoryMap { + memoryState2, ok := nodeState2.MemoryMap[resourceName] + if !ok { + klog.Errorf("[memorymanager] memory state does not have resource %s", resourceName) + return false + } + + if !reflect.DeepEqual(*memoryState1, *memoryState2) { + klog.Errorf("[memorymanager] memory states for the NUMA node %d and the resource %s are different: %+v != %+v", nodeId, resourceName, *memoryState1, *memoryState2) + return false + } + } + } + return true +} + +func (p *staticPolicy) getDefaultMachineState() state.NodeMap { + defaultMachineState := state.NodeMap{} + nodeHugepages := map[int]uint64{} + for _, node := range p.machineInfo.Topology { + defaultMachineState[node.Id] = &state.NodeState{ + NumberOfAssignments: 0, + MemoryMap: map[v1.ResourceName]*state.MemoryTable{}, + Nodes: []int{node.Id}, + } + + // fill memory table with huge pages values + for _, hugepage := range node.HugePages { + hugepageQuantity := resource.NewQuantity(int64(hugepage.PageSize)*1024, resource.BinarySI) + resourceName := corehelper.HugePageResourceName(*hugepageQuantity) + systemReserved := p.getResourceSystemReserved(node.Id, resourceName) + totalHugepagesSize := hugepage.NumPages * hugepage.PageSize * 1024 + allocatable := totalHugepagesSize - systemReserved + defaultMachineState[node.Id].MemoryMap[resourceName] = &state.MemoryTable{ + Allocatable: allocatable, + Free: allocatable, + Reserved: 0, + SystemReserved: systemReserved, + TotalMemSize: totalHugepagesSize, + } + if _, ok := nodeHugepages[node.Id]; !ok { + nodeHugepages[node.Id] = 0 + } + nodeHugepages[node.Id] += totalHugepagesSize + } + + // fill memory table with regular memory values + systemReserved := p.getResourceSystemReserved(node.Id, v1.ResourceMemory) + + allocatable := node.Memory - systemReserved + // remove memory allocated by hugepages + if allocatedByHugepages, ok := nodeHugepages[node.Id]; ok { + allocatable -= allocatedByHugepages + } + defaultMachineState[node.Id].MemoryMap[v1.ResourceMemory] = &state.MemoryTable{ + Allocatable: allocatable, + Free: allocatable, + Reserved: 0, + SystemReserved: systemReserved, + TotalMemSize: node.Memory, + } + } + return defaultMachineState +} + +func (p *staticPolicy) getResourceSystemReserved(nodeId int, resourceName v1.ResourceName) uint64 { + var systemReserved uint64 + if nodeSystemReserved, ok := p.systemReserved[nodeId]; ok { + if nodeMemorySystemReserved, ok := nodeSystemReserved[resourceName]; ok { + systemReserved = nodeMemorySystemReserved + } + } + return systemReserved +} + +func (p *staticPolicy) getDefaultHint(s state.State, requestedResources map[v1.ResourceName]uint64) (*topologymanager.TopologyHint, error) { + hints := p.calculateHints(s, requestedResources) + if len(hints) < 1 { + return nil, fmt.Errorf("[memorymanager] failed to get the default NUMA affinity, no NUMA nodes with enough memory is available") + } + + // hints for all memory types should be the same, so we will check hints only for regular memory type + return findBestHint(hints[string(v1.ResourceMemory)]), nil +} + +func isAffinitySatisfyRequest(machineState state.NodeMap, mask bitmask.BitMask, requestedResources map[v1.ResourceName]uint64) bool { + totalFreeSize := map[v1.ResourceName]uint64{} + for _, nodeId := range mask.GetBits() { + for resourceName := range requestedResources { + if _, ok := totalFreeSize[resourceName]; !ok { + totalFreeSize[resourceName] = 0 + } + totalFreeSize[resourceName] += machineState[nodeId].MemoryMap[resourceName].Free + } + } + + // verify that for all memory types the node mask has enough resources + for resourceName, requestedSize := range requestedResources { + if totalFreeSize[resourceName] < requestedSize { + return false + } + } + + return true +} + +// extendTopologyManagerHint extends the topology manager hint, in case when it does not satisfy to the container request +// the topology manager uses bitwise AND to merge all topology hints into the best one, so in case of the restricted policy, +// it possible that we will get the subset of hint that we provided to the topology manager, in this case we want to extend +// it to the original one +func (p *staticPolicy) extendTopologyManagerHint(s state.State, requestedResources map[v1.ResourceName]uint64, mask bitmask.BitMask) (*topologymanager.TopologyHint, error) { + hints := p.calculateHints(s, requestedResources) + + var filteredHints []topologymanager.TopologyHint + // hints for all memory types should be the same, so we will check hints only for regular memory type + for _, hint := range hints[string(v1.ResourceMemory)] { + affinityBits := hint.NUMANodeAffinity.GetBits() + // filter all hints that does not include currentHint + if isHintInGroup(mask.GetBits(), affinityBits) { + filteredHints = append(filteredHints, hint) + } + } + + if len(filteredHints) < 1 { + return nil, fmt.Errorf("[memorymanager] failed to find NUMA nodes to extend the current topology hint") + } + + // try to find the preferred hint with the minimal number of NUMA nodes, relevant for the restricted policy + return findBestHint(filteredHints), nil +} + +func isHintInGroup(hint []int, group []int) bool { + sort.Ints(hint) + sort.Ints(group) + + hintIndex := 0 + for i := range group { + if hintIndex == len(hint) { + return true + } + + if group[i] != hint[hintIndex] { + continue + } + hintIndex++ + } + return false +} + +func findBestHint(hints []topologymanager.TopologyHint) *topologymanager.TopologyHint { + // try to find the preferred hint with the minimal number of NUMA nodes, relevant for the restricted policy + bestHint := topologymanager.TopologyHint{} + for _, hint := range hints { + if bestHint.NUMANodeAffinity == nil { + bestHint = hint + continue + } + + // preferred of the current hint is true, when the extendedHint preferred is false + if hint.Preferred && !bestHint.Preferred { + bestHint = hint + continue + } + + // both hints has the same preferred value, but the current hint has less NUMA nodes than the extended one + if hint.Preferred == bestHint.Preferred && hint.NUMANodeAffinity.IsNarrowerThan(bestHint.NUMANodeAffinity) { + bestHint = hint + } + } + return &bestHint +} From 18c8a821e0fe4bf46dffded61486a816d6a0e1da Mon Sep 17 00:00:00 2001 From: Pawel Rapacz Date: Fri, 25 Sep 2020 15:53:34 +0200 Subject: [PATCH 14/31] memory manager: implement GetPodTopologyHints method It will return memory and hugepages hints for the whole pod. Signed-off-by: Pawel Rapacz --- .../cm/memorymanager/fake_memory_manager.go | 9 +- .../cm/memorymanager/memory_manager.go | 13 ++ .../cm/memorymanager/memory_manager_test.go | 67 ++++++- pkg/kubelet/cm/memorymanager/policy.go | 4 + pkg/kubelet/cm/memorymanager/policy_none.go | 7 + pkg/kubelet/cm/memorymanager/policy_static.go | 173 ++++++++++++------ 6 files changed, 219 insertions(+), 54 deletions(-) diff --git a/pkg/kubelet/cm/memorymanager/fake_memory_manager.go b/pkg/kubelet/cm/memorymanager/fake_memory_manager.go index 334476f702ee..1cbe0171013f 100644 --- a/pkg/kubelet/cm/memorymanager/fake_memory_manager.go +++ b/pkg/kubelet/cm/memorymanager/fake_memory_manager.go @@ -18,8 +18,8 @@ package memorymanager import ( v1 "k8s.io/api/core/v1" - "k8s.io/klog" - "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/containermap" + "k8s.io/klog/v2" + "k8s.io/kubernetes/pkg/kubelet/cm/containermap" "k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" "k8s.io/kubernetes/pkg/kubelet/config" @@ -60,6 +60,11 @@ func (m *fakeManager) GetTopologyHints(pod *v1.Pod, container *v1.Container) map return map[string][]topologymanager.TopologyHint{} } +func (m *fakeManager) GetPodTopologyHints(pod *v1.Pod) map[string][]topologymanager.TopologyHint { + klog.Infof("[fake memorymanager] Get Pod Topology Hints") + return map[string][]topologymanager.TopologyHint{} +} + func (m *fakeManager) State() state.Reader { return m.state } diff --git a/pkg/kubelet/cm/memorymanager/memory_manager.go b/pkg/kubelet/cm/memorymanager/memory_manager.go index 0b711244c479..3f906ac68f99 100644 --- a/pkg/kubelet/cm/memorymanager/memory_manager.go +++ b/pkg/kubelet/cm/memorymanager/memory_manager.go @@ -76,6 +76,11 @@ type Manager interface { // and is consulted to achieve NUMA aware resource alignment among this // and other resource controllers. GetTopologyHints(*v1.Pod, *v1.Container) map[string][]topologymanager.TopologyHint + + // GetPodTopologyHints implements the topologymanager.HintProvider Interface + // and is consulted to achieve NUMA aware resource alignment among this + // and other resource controllers. + GetPodTopologyHints(*v1.Pod) map[string][]topologymanager.TopologyHint } type manager struct { @@ -246,6 +251,14 @@ func (m *manager) State() state.Reader { return m.state } +// GetPodTopologyHints returns the topology hints for the topology manager +func (m *manager) GetPodTopologyHints(pod *v1.Pod) map[string][]topologymanager.TopologyHint { + // Garbage collect any stranded resources before providing TopologyHints + m.removeStaleState() + // Delegate to active policy + return m.policy.GetPodTopologyHints(m.state, pod) +} + // GetTopologyHints returns the topology hints for the topology manager func (m *manager) GetTopologyHints(pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint { // Garbage collect any stranded resources before providing TopologyHints diff --git a/pkg/kubelet/cm/memorymanager/memory_manager_test.go b/pkg/kubelet/cm/memorymanager/memory_manager_test.go index ae6ec42e6ad0..aed29893d888 100644 --- a/pkg/kubelet/cm/memorymanager/memory_manager_test.go +++ b/pkg/kubelet/cm/memorymanager/memory_manager_test.go @@ -11,6 +11,10 @@ import ( info "github.com/google/cadvisor/info/v1" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/apimachinery/pkg/types" + runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" + "k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state" + "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" ) const ( @@ -20,7 +24,67 @@ const ( type nodeResources map[v1.ResourceName]resource.Quantity -// validateReservedMemory +type mockPolicy struct { + err error +} + +func (p *mockPolicy) Name() string { + return string(policyTypeMock) +} + +func (p *mockPolicy) Start(s state.State) error { + return p.err +} + +func (p *mockPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Container) error { + return p.err +} + +func (p *mockPolicy) RemoveContainer(s state.State, podUID string, containerName string) error { + return p.err +} + +func (p *mockPolicy) GetTopologyHints(s state.State, pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint { + return nil +} + +func (p *mockPolicy) GetPodTopologyHints(s state.State, pod *v1.Pod) map[string][]topologymanager.TopologyHint { + return nil +} + +type mockRuntimeService struct { + err error +} + +func (rt mockRuntimeService) UpdateContainerResources(id string, resources *runtimeapi.LinuxContainerResources) error { + return rt.err +} + +type mockPodStatusProvider struct { + podStatus v1.PodStatus + found bool +} + +func (psp mockPodStatusProvider) GetPodStatus(uid types.UID) (v1.PodStatus, bool) { + return psp.podStatus, psp.found +} + +func getPod(podUID string, containerName string, requirements *v1.ResourceRequirements) *v1.Pod { + return &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + UID: types.UID(podUID), + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: containerName, + Resources: *requirements, + }, + }, + }, + } +} + func TestValidatePreReservedMemory(t *testing.T) { const msgNotEqual = "the total amount of memory of type \"%s\" is not equal to the value determined by Node Allocatable feature" testCases := []struct { @@ -106,6 +170,7 @@ func TestValidatePreReservedMemory(t *testing.T) { } } +// validateReservedMemory func TestConvertPreReserved(t *testing.T) { machineInfo := info.MachineInfo{ Topology: []info.Node{ diff --git a/pkg/kubelet/cm/memorymanager/policy.go b/pkg/kubelet/cm/memorymanager/policy.go index dea23b335e35..8d84c71f137c 100644 --- a/pkg/kubelet/cm/memorymanager/policy.go +++ b/pkg/kubelet/cm/memorymanager/policy.go @@ -37,4 +37,8 @@ type Policy interface { // and is consulted to achieve NUMA aware resource alignment among this // and other resource controllers. GetTopologyHints(s state.State, pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint + // GetPodTopologyHints implements the topologymanager.HintProvider Interface + // and is consulted to achieve NUMA aware resource alignment among this + // and other resource controllers. + GetPodTopologyHints(s state.State, pod *v1.Pod) map[string][]topologymanager.TopologyHint } diff --git a/pkg/kubelet/cm/memorymanager/policy_none.go b/pkg/kubelet/cm/memorymanager/policy_none.go index e91c3ce3439d..a8b6b778520a 100644 --- a/pkg/kubelet/cm/memorymanager/policy_none.go +++ b/pkg/kubelet/cm/memorymanager/policy_none.go @@ -59,3 +59,10 @@ func (p *none) RemoveContainer(s state.State, podUID string, containerName strin func (p *none) GetTopologyHints(s state.State, pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint { return nil } + +// GetPodTopologyHints implements the topologymanager.HintProvider Interface +// and is consulted to achieve NUMA aware resource alignment among this +// and other resource controllers. +func (p *none) GetPodTopologyHints(s state.State, pod *v1.Pod) map[string][]topologymanager.TopologyHint { + return nil +} diff --git a/pkg/kubelet/cm/memorymanager/policy_static.go b/pkg/kubelet/cm/memorymanager/policy_static.go index 5cee6851448b..c26b4efb2652 100644 --- a/pkg/kubelet/cm/memorymanager/policy_static.go +++ b/pkg/kubelet/cm/memorymanager/policy_static.go @@ -147,9 +147,9 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai }) // Update nodes memory state - for _, nodeId := range maskBits { - machineState[nodeId].NumberOfAssignments++ - machineState[nodeId].Nodes = maskBits + for _, nodeID := range maskBits { + machineState[nodeID].NumberOfAssignments++ + machineState[nodeID].Nodes = maskBits // we need to continue to update all affinity mask nodes if requestedSize == 0 { @@ -157,7 +157,7 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai } // update the node memory state - nodeResourceMemoryState := machineState[nodeId].MemoryMap[resourceName] + nodeResourceMemoryState := machineState[nodeID].MemoryMap[resourceName] if nodeResourceMemoryState.Free <= 0 { continue } @@ -197,12 +197,12 @@ func (p *staticPolicy) RemoveContainer(s state.State, podUID string, containerNa machineState := s.GetMachineState() for _, b := range blocks { releasedSize := b.Size - for _, nodeId := range b.NUMAAffinity { - machineState[nodeId].NumberOfAssignments-- + for _, nodeID := range b.NUMAAffinity { + machineState[nodeID].NumberOfAssignments-- // once we do not have any memory allocations on this node, clear node groups - if machineState[nodeId].NumberOfAssignments == 0 { - machineState[nodeId].Nodes = []int{nodeId} + if machineState[nodeID].NumberOfAssignments == 0 { + machineState[nodeID].Nodes = []int{nodeID} } // we still need to pass over all NUMA node under the affinity mask to update them @@ -210,7 +210,7 @@ func (p *staticPolicy) RemoveContainer(s state.State, podUID string, containerNa continue } - nodeResourceMemoryState := machineState[nodeId].MemoryMap[b.Type] + nodeResourceMemoryState := machineState[nodeID].MemoryMap[b.Type] // if the node does not have reserved memory to free, continue to the next node if nodeResourceMemoryState.Reserved == 0 { @@ -238,6 +238,110 @@ func (p *staticPolicy) RemoveContainer(s state.State, podUID string, containerNa return nil } +func regenerateHints(pod *v1.Pod, ctn *v1.Container, ctnBlocks []state.Block, reqRsrc map[v1.ResourceName]uint64) map[string][]topologymanager.TopologyHint { + hints := map[string][]topologymanager.TopologyHint{} + for resourceName := range reqRsrc { + hints[string(resourceName)] = []topologymanager.TopologyHint{} + } + + if len(ctnBlocks) != len(reqRsrc) { + klog.Errorf("[memorymanager] The number of requested resources by the container %s differs from the number of memory blocks", ctn.Name) + return nil + } + + for _, b := range ctnBlocks { + if _, ok := reqRsrc[b.Type]; !ok { + klog.Errorf("[memorymanager] Container %s requested resources do not have resource of type %s", ctn.Name, b.Type) + return nil + } + + if b.Size != reqRsrc[b.Type] { + klog.Errorf("[memorymanager] Memory %s already allocated to (pod %v, container %v) with different number than request: requested: %d, allocated: %d", b.Type, pod.UID, ctn.Name, reqRsrc[b.Type], b.Size) + return nil + } + + containerNUMAAffinity, err := bitmask.NewBitMask(b.NUMAAffinity...) + if err != nil { + klog.Errorf("[memorymanager] failed to generate NUMA bitmask: %v", err) + return nil + } + + klog.Infof("[memorymanager] Regenerating TopologyHints, %s was already allocated to (pod %v, container %v)", b.Type, pod.UID, ctn.Name) + hints[string(b.Type)] = append(hints[string(b.Type)], topologymanager.TopologyHint{ + NUMANodeAffinity: containerNUMAAffinity, + Preferred: true, + }) + } + return hints +} + +func getPodRequestedResources(pod *v1.Pod) (map[v1.ResourceName]uint64, error) { + reqRsrcsByInitCtrs := make(map[v1.ResourceName]uint64) + reqRsrcsByAppCtrs := make(map[v1.ResourceName]uint64) + + for _, ctr := range pod.Spec.InitContainers { + reqRsrcs, err := getRequestedResources(&ctr) + + if err != nil { + return nil, err + } + for rsrcName, qty := range reqRsrcs { + if _, ok := reqRsrcsByInitCtrs[rsrcName]; !ok { + reqRsrcsByInitCtrs[rsrcName] = uint64(0) + } + + if reqRsrcs[rsrcName] > reqRsrcsByInitCtrs[rsrcName] { + reqRsrcsByInitCtrs[rsrcName] = qty + } + } + } + + for _, ctr := range pod.Spec.Containers { + reqRsrcs, err := getRequestedResources(&ctr) + + if err != nil { + return nil, err + } + for rsrcName, qty := range reqRsrcs { + if _, ok := reqRsrcsByAppCtrs[rsrcName]; !ok { + reqRsrcsByAppCtrs[rsrcName] = uint64(0) + } + + reqRsrcsByAppCtrs[rsrcName] += qty + } + } + + for rsrcName := range reqRsrcsByAppCtrs { + if reqRsrcsByInitCtrs[rsrcName] > reqRsrcsByAppCtrs[rsrcName] { + reqRsrcsByAppCtrs[rsrcName] = reqRsrcsByInitCtrs[rsrcName] + } + } + return reqRsrcsByAppCtrs, nil +} + +func (p *staticPolicy) GetPodTopologyHints(s state.State, pod *v1.Pod) map[string][]topologymanager.TopologyHint { + if v1qos.GetPodQOS(pod) != v1.PodQOSGuaranteed { + return nil + } + + reqRsrcs, err := getPodRequestedResources(pod) + if err != nil { + klog.Error(err.Error()) + return nil + } + + for _, ctn := range append(pod.Spec.InitContainers, pod.Spec.Containers...) { + containerBlocks := s.GetMemoryBlocks(string(pod.UID), ctn.Name) + // Short circuit to regenerate the same hints if there are already + // memory allocated for the container. This might happen after a + // kubelet restart, for example. + if containerBlocks != nil { + return regenerateHints(pod, &ctn, containerBlocks, reqRsrcs) + } + } + return p.calculateHints(s, reqRsrcs) +} + // GetTopologyHints implements the topologymanager.HintProvider Interface // and is consulted to achieve NUMA aware resource alignment among this // and other resource controllers. @@ -252,45 +356,12 @@ func (p *staticPolicy) GetTopologyHints(s state.State, pod *v1.Pod, container *v return nil } - hints := map[string][]topologymanager.TopologyHint{} - for resourceName := range requestedResources { - hints[string(resourceName)] = []topologymanager.TopologyHint{} - } - containerBlocks := s.GetMemoryBlocks(string(pod.UID), container.Name) // Short circuit to regenerate the same hints if there are already // memory allocated for the container. This might happen after a // kubelet restart, for example. if containerBlocks != nil { - if len(containerBlocks) != len(requestedResources) { - klog.Errorf("[memorymanager] The number of requested resources by the container %s differs from the number of memory blocks", container.Name) - return nil - } - - for _, b := range containerBlocks { - if _, ok := requestedResources[b.Type]; !ok { - klog.Errorf("[memorymanager] Container %s requested resources do not have resource of type %s", container.Name, b.Type) - return nil - } - - if b.Size != requestedResources[b.Type] { - klog.Errorf("[memorymanager] Memory %s already allocated to (pod %v, container %v) with different number than request: requested: %d, allocated: %d", b.Type, pod.UID, container.Name, requestedResources[b.Type], b.Size) - return nil - } - - containerNUMAAffinity, err := bitmask.NewBitMask(b.NUMAAffinity...) - if err != nil { - klog.Errorf("[memorymanager] failed to generate NUMA bitmask: %v", err) - return nil - } - - klog.Infof("[memorymanager] Regenerating TopologyHints, %s was already allocated to (pod %v, container %v)", b.Type, pod.UID, container.Name) - hints[string(b.Type)] = append(hints[string(b.Type)], topologymanager.TopologyHint{ - NUMANodeAffinity: containerNUMAAffinity, - Preferred: true, - }) - } - return hints + return regenerateHints(pod, container, containerBlocks, requestedResources) } return p.calculateHints(s, requestedResources) @@ -503,10 +574,10 @@ func areMachineStatesEqual(ms1, ms2 state.NodeMap) bool { return false } - for nodeId, nodeState1 := range ms1 { - nodeState2, ok := ms2[nodeId] + for nodeID, nodeState1 := range ms1 { + nodeState2, ok := ms2[nodeID] if !ok { - klog.Errorf("[memorymanager] node state does not have node ID %d", nodeId) + klog.Errorf("[memorymanager] node state does not have node ID %d", nodeID) return false } @@ -533,7 +604,7 @@ func areMachineStatesEqual(ms1, ms2 state.NodeMap) bool { } if !reflect.DeepEqual(*memoryState1, *memoryState2) { - klog.Errorf("[memorymanager] memory states for the NUMA node %d and the resource %s are different: %+v != %+v", nodeId, resourceName, *memoryState1, *memoryState2) + klog.Errorf("[memorymanager] memory states for the NUMA node %d and the resource %s are different: %+v != %+v", nodeID, resourceName, *memoryState1, *memoryState2) return false } } @@ -590,9 +661,9 @@ func (p *staticPolicy) getDefaultMachineState() state.NodeMap { return defaultMachineState } -func (p *staticPolicy) getResourceSystemReserved(nodeId int, resourceName v1.ResourceName) uint64 { +func (p *staticPolicy) getResourceSystemReserved(nodeID int, resourceName v1.ResourceName) uint64 { var systemReserved uint64 - if nodeSystemReserved, ok := p.systemReserved[nodeId]; ok { + if nodeSystemReserved, ok := p.systemReserved[nodeID]; ok { if nodeMemorySystemReserved, ok := nodeSystemReserved[resourceName]; ok { systemReserved = nodeMemorySystemReserved } @@ -612,12 +683,12 @@ func (p *staticPolicy) getDefaultHint(s state.State, requestedResources map[v1.R func isAffinitySatisfyRequest(machineState state.NodeMap, mask bitmask.BitMask, requestedResources map[v1.ResourceName]uint64) bool { totalFreeSize := map[v1.ResourceName]uint64{} - for _, nodeId := range mask.GetBits() { + for _, nodeID := range mask.GetBits() { for resourceName := range requestedResources { if _, ok := totalFreeSize[resourceName]; !ok { totalFreeSize[resourceName] = 0 } - totalFreeSize[resourceName] += machineState[nodeId].MemoryMap[resourceName].Free + totalFreeSize[resourceName] += machineState[nodeID].MemoryMap[resourceName].Free } } From d7175a831c0050d481facbf04ca0da49117566e0 Mon Sep 17 00:00:00 2001 From: Krzysztof Wiatrzyk Date: Wed, 3 Jun 2020 11:05:14 +0200 Subject: [PATCH 15/31] memory manager: adding Memory Manager component unit tests Signed-off-by: Krzysztof Wiatrzyk --- .../cm/memorymanager/memory_manager_test.go | 2094 ++++++++++++++++- .../cm/memorymanager/policy_static_test.go | 1 + 2 files changed, 2075 insertions(+), 20 deletions(-) create mode 100644 pkg/kubelet/cm/memorymanager/policy_static_test.go diff --git a/pkg/kubelet/cm/memorymanager/memory_manager_test.go b/pkg/kubelet/cm/memorymanager/memory_manager_test.go index aed29893d888..40abd34447ed 100644 --- a/pkg/kubelet/cm/memorymanager/memory_manager_test.go +++ b/pkg/kubelet/cm/memorymanager/memory_manager_test.go @@ -1,18 +1,37 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + package memorymanager import ( "fmt" + "io/ioutil" + "os" "reflect" "strings" "testing" + cadvisorapi "github.com/google/cadvisor/info/v1" "github.com/stretchr/testify/assert" - - info "github.com/google/cadvisor/info/v1" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" + "k8s.io/kubernetes/pkg/kubelet/cm/containermap" "k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" ) @@ -22,6 +41,47 @@ const ( hugepages1G = "hugepages-1Gi" ) +const policyTypeMock policyType = "mock" + +type testMemoryManager struct { + description string + machineInfo cadvisorapi.MachineInfo + assignments state.ContainerMemoryAssignments + expectedAssignments state.ContainerMemoryAssignments + machineState state.NodeMap + expectedMachineState state.NodeMap + expectedError error + expectedAllocateError error + expectedAddContainerError error + updateError error + removeContainerID string + nodeAllocatableReservation v1.ResourceList + policyName policyType + affinity topologymanager.Store + systemReservedMemory map[int]map[v1.ResourceName]resource.Quantity + expectedHints map[string][]topologymanager.TopologyHint + expectedReserved systemReservedMemory + reserved systemReservedMemory + podAllocate *v1.Pod + firstPod *v1.Pod + activePods []*v1.Pod +} + +func returnPolicyByName(testCase testMemoryManager) Policy { + switch policyType(testCase.policyName) { + case policyTypeMock: + return &mockPolicy{ + err: fmt.Errorf("fake reg error"), + } + case policyTypeStatic: + policy, _ := NewPolicyStatic(&testCase.machineInfo, testCase.reserved, topologymanager.NewFakeManager()) + return policy + case policyTypeNone: + return NewPolicyNone() + } + return nil +} + type nodeResources map[v1.ResourceName]resource.Quantity type mockPolicy struct { @@ -85,12 +145,12 @@ func getPod(podUID string, containerName string, requirements *v1.ResourceRequir } } -func TestValidatePreReservedMemory(t *testing.T) { +func TestValidateReservedMemory(t *testing.T) { const msgNotEqual = "the total amount of memory of type \"%s\" is not equal to the value determined by Node Allocatable feature" testCases := []struct { description string nodeAllocatableReservation v1.ResourceList - preReservedMemory map[int]map[v1.ResourceName]resource.Quantity + systemReservedMemory map[int]map[v1.ResourceName]resource.Quantity expectedError string }{ { @@ -161,7 +221,7 @@ func TestValidatePreReservedMemory(t *testing.T) { for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { - err := validateReservedMemory(tc.nodeAllocatableReservation, tc.preReservedMemory) + err := validateReservedMemory(tc.nodeAllocatableReservation, tc.systemReservedMemory) if strings.TrimSpace(tc.expectedError) != "" { assert.Error(t, err) assert.Equal(t, err.Error(), tc.expectedError) @@ -170,25 +230,24 @@ func TestValidatePreReservedMemory(t *testing.T) { } } -// validateReservedMemory func TestConvertPreReserved(t *testing.T) { - machineInfo := info.MachineInfo{ - Topology: []info.Node{ - info.Node{Id: 0}, - info.Node{Id: 1}, + machineInfo := cadvisorapi.MachineInfo{ + Topology: []cadvisorapi.Node{ + {Id: 0}, + {Id: 1}, }, } testCases := []struct { - description string - reserved map[int]map[v1.ResourceName]resource.Quantity - reservedExpected reservedMemory - expectedError string + description string + systemReserved map[int]map[v1.ResourceName]resource.Quantity + systemReservedExpected systemReservedMemory + expectedError string }{ { "Empty", map[int]map[v1.ResourceName]resource.Quantity{}, - reservedMemory{ + systemReservedMemory{ 0: map[v1.ResourceName]uint64{}, 1: map[v1.ResourceName]uint64{}, }, @@ -201,7 +260,7 @@ func TestConvertPreReserved(t *testing.T) { hugepages2M: *resource.NewQuantity(70, resource.DecimalSI), hugepages1G: *resource.NewQuantity(13, resource.DecimalSI)}, }, - reservedMemory{ + systemReservedMemory{ 0: map[v1.ResourceName]uint64{ v1.ResourceMemory: 12, hugepages2M: 70, @@ -220,7 +279,7 @@ func TestConvertPreReserved(t *testing.T) { 1: nodeResources{v1.ResourceMemory: *resource.NewQuantity(5, resource.DecimalSI), hugepages2M: *resource.NewQuantity(7, resource.DecimalSI)}, }, - reservedMemory{ + systemReservedMemory{ 0: map[v1.ResourceName]uint64{ v1.ResourceMemory: 12, hugepages2M: 70, @@ -237,10 +296,2005 @@ func TestConvertPreReserved(t *testing.T) { for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { - reserved, _ := convertReserved(&machineInfo, tc.reserved) - if !reflect.DeepEqual(reserved, tc.reservedExpected) { - t.Errorf("got %v, expected %v", reserved, tc.reservedExpected) + reserved, _ := convertReserved(&machineInfo, tc.systemReserved) + if !reflect.DeepEqual(reserved, tc.systemReservedExpected) { + t.Errorf("got %v, expected %v", reserved, tc.systemReservedExpected) + } + }) + } +} + +func TestGetSystemReservedMemory(t *testing.T) { + machineInfo := returnMachineInfo() + testCases := []testMemoryManager{ + { + description: "Should return empty map when reservation is not done", + nodeAllocatableReservation: v1.ResourceList{}, + systemReservedMemory: map[int]map[v1.ResourceName]resource.Quantity{}, + expectedReserved: systemReservedMemory{ + 0: {}, + 1: {}, + }, + expectedError: nil, + machineInfo: machineInfo, + }, + { + description: "Should return error when Allocatable reservation is not equal pre reserved memory", + nodeAllocatableReservation: v1.ResourceList{}, + systemReservedMemory: map[int]map[v1.ResourceName]resource.Quantity{ + 0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(gb, resource.BinarySI)}, + }, + expectedReserved: nil, + expectedError: fmt.Errorf("the total amount of memory of type \"memory\" is not equal to the value determined by Node Allocatable feature"), + machineInfo: machineInfo, + }, + { + description: "Reserved should be equal to systemReservedMemory", + nodeAllocatableReservation: v1.ResourceList{v1.ResourceMemory: *resource.NewQuantity(2*gb, resource.BinarySI)}, + systemReservedMemory: map[int]map[v1.ResourceName]resource.Quantity{ + 0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(gb, resource.BinarySI)}, + 1: nodeResources{v1.ResourceMemory: *resource.NewQuantity(gb, resource.BinarySI)}, + }, + expectedReserved: systemReservedMemory{ + 0: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 1 * gb, + }, + 1: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 1 * gb, + }, + }, + expectedError: nil, + machineInfo: machineInfo, + }, + } + + for _, testCase := range testCases { + t.Run(testCase.description, func(t *testing.T) { + res, err := getSystemReservedMemory(&testCase.machineInfo, testCase.nodeAllocatableReservation, testCase.systemReservedMemory) + + if !reflect.DeepEqual(res, testCase.expectedReserved) { + t.Errorf("Memory Manager getReservedMemory() error, expected reserved %+v, but got: %+v", + testCase.expectedReserved, res) + } + if !reflect.DeepEqual(err, testCase.expectedError) { + t.Errorf("Memory Manager getReservedMemory() error, expected error %v, but got: %v", + testCase.expectedError, err) + } + + }) + } +} + +func TestRemoveStaleState(t *testing.T) { + machineInfo := returnMachineInfo() + testCases := []testMemoryManager{ + { + description: "Should fail - policy returns an error", + policyName: policyTypeMock, + machineInfo: machineInfo, + reserved: systemReservedMemory{ + 0: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 1 * gb, + }, + 1: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 1 * gb, + }, + }, + assignments: state.ContainerMemoryAssignments{ + "fakePod1": map[string][]state.Block{ + "fakeContainer1": { + { + NUMAAffinity: []int{0}, + Type: v1.ResourceMemory, + Size: 1 * gb, + }, + { + NUMAAffinity: []int{0}, + Type: hugepages1Gi, + Size: 1 * gb, + }, + }, + "fakeContainer2": { + { + NUMAAffinity: []int{0}, + Type: v1.ResourceMemory, + Size: 1 * gb, + }, + { + NUMAAffinity: []int{0}, + Type: hugepages1Gi, + Size: 1 * gb, + }, + }, + }, + }, + expectedAssignments: state.ContainerMemoryAssignments{ + "fakePod1": map[string][]state.Block{ + "fakeContainer1": { + { + NUMAAffinity: []int{0}, + Type: v1.ResourceMemory, + Size: 1 * gb, + }, + { + NUMAAffinity: []int{0}, + Type: hugepages1Gi, + Size: 1 * gb, + }, + }, + "fakeContainer2": { + { + NUMAAffinity: []int{0}, + Type: v1.ResourceMemory, + Size: 1 * gb, + }, + { + NUMAAffinity: []int{0}, + Type: hugepages1Gi, + Size: 1 * gb, + }, + }, + }, + }, + machineState: state.NodeMap{ + 0: &state.NodeState{ + Nodes: []int{0}, + NumberOfAssignments: 4, + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 9 * gb, + Free: 7 * gb, + Reserved: 2 * gb, + SystemReserved: 1 * gb, + TotalMemSize: 10 * gb, + }, + hugepages1Gi: { + Allocatable: 5 * gb, + Free: 3 * gb, + Reserved: 2 * gb, + SystemReserved: 0 * gb, + TotalMemSize: 5 * gb, + }, + }, + }, + 1: &state.NodeState{ + Nodes: []int{1}, + NumberOfAssignments: 0, + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 9 * gb, + Free: 9 * gb, + Reserved: 0 * gb, + SystemReserved: 1 * gb, + TotalMemSize: 10 * gb, + }, + hugepages1Gi: { + Allocatable: 5 * gb, + Free: 5 * gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: 5 * gb, + }, + }, + }, + }, + expectedMachineState: state.NodeMap{ + 0: &state.NodeState{ + Nodes: []int{0}, + NumberOfAssignments: 4, + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 9 * gb, + Free: 7 * gb, + Reserved: 2 * gb, + SystemReserved: 1 * gb, + TotalMemSize: 10 * gb, + }, + hugepages1Gi: { + Allocatable: 5 * gb, + Free: 3 * gb, + Reserved: 2 * gb, + SystemReserved: 0 * gb, + TotalMemSize: 5 * gb, + }, + }, + }, + 1: &state.NodeState{ + Nodes: []int{1}, + NumberOfAssignments: 0, + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 9 * gb, + Free: 9 * gb, + Reserved: 0 * gb, + SystemReserved: 1 * gb, + TotalMemSize: 10 * gb, + }, + hugepages1Gi: { + Allocatable: 5 * gb, + Free: 5 * gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: 5 * gb, + }, + }, + }, + }, + }, + { + description: "Stale state successfully removed, without multi NUMA assignments", + policyName: policyTypeStatic, + machineInfo: machineInfo, + reserved: systemReservedMemory{ + 0: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 1 * gb, + }, + 1: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 1 * gb, + }, + }, + assignments: state.ContainerMemoryAssignments{ + "fakePod1": map[string][]state.Block{ + "fakeContainer1": { + { + NUMAAffinity: []int{0}, + Type: v1.ResourceMemory, + Size: 1 * gb, + }, + { + NUMAAffinity: []int{0}, + Type: hugepages1Gi, + Size: 1 * gb, + }, + }, + "fakeContainer2": { + { + NUMAAffinity: []int{0}, + Type: v1.ResourceMemory, + Size: 1 * gb, + }, + { + NUMAAffinity: []int{0}, + Type: hugepages1Gi, + Size: 1 * gb, + }, + }, + }, + }, + expectedAssignments: state.ContainerMemoryAssignments{}, + machineState: state.NodeMap{ + 0: &state.NodeState{ + Nodes: []int{0}, + NumberOfAssignments: 4, + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 9 * gb, + Free: 7 * gb, + Reserved: 2 * gb, + SystemReserved: 1 * gb, + TotalMemSize: 10 * gb, + }, + hugepages1Gi: { + Allocatable: 5 * gb, + Free: 3 * gb, + Reserved: 2 * gb, + SystemReserved: 0 * gb, + TotalMemSize: 5 * gb, + }, + }, + }, + 1: &state.NodeState{ + Nodes: []int{1}, + NumberOfAssignments: 0, + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 9 * gb, + Free: 9 * gb, + Reserved: 0 * gb, + SystemReserved: 1 * gb, + TotalMemSize: 10 * gb, + }, + hugepages1Gi: { + Allocatable: 5 * gb, + Free: 5 * gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: 5 * gb, + }, + }, + }, + }, + expectedMachineState: state.NodeMap{ + 0: &state.NodeState{ + Nodes: []int{0}, + NumberOfAssignments: 0, + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 9 * gb, + Free: 9 * gb, + Reserved: 0 * gb, + SystemReserved: 1 * gb, + TotalMemSize: 10 * gb, + }, + hugepages1Gi: { + Allocatable: 5 * gb, + Free: 5 * gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: 5 * gb, + }, + }, + }, + 1: &state.NodeState{ + Nodes: []int{1}, + NumberOfAssignments: 0, + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 9 * gb, + Free: 9 * gb, + Reserved: 0 * gb, + SystemReserved: 1 * gb, + TotalMemSize: 10 * gb, + }, + hugepages1Gi: { + Allocatable: 5 * gb, + Free: 5 * gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: 5 * gb, + }, + }, + }, + }, + }, + { + description: "Stale state successfully removed, with multi NUMA assignments", + policyName: policyTypeStatic, + machineInfo: machineInfo, + reserved: systemReservedMemory{ + 0: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 1 * gb, + }, + 1: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 1 * gb, + }, + }, + assignments: state.ContainerMemoryAssignments{ + "fakePod1": map[string][]state.Block{ + "fakeContainer1": { + { + NUMAAffinity: []int{0, 1}, + Type: v1.ResourceMemory, + Size: 12 * gb, + }, + { + NUMAAffinity: []int{0, 1}, + Type: hugepages1Gi, + Size: 1 * gb, + }, + }, + "fakeContainer2": { + { + NUMAAffinity: []int{0, 1}, + Type: v1.ResourceMemory, + Size: 1 * gb, + }, + { + NUMAAffinity: []int{0, 1}, + Type: hugepages1Gi, + Size: 1 * gb, + }, + }, + }, + }, + expectedAssignments: state.ContainerMemoryAssignments{}, + machineState: state.NodeMap{ + 0: &state.NodeState{ + Nodes: []int{0, 1}, + NumberOfAssignments: 4, + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 9 * gb, + Free: 0 * gb, + Reserved: 9 * gb, + SystemReserved: 1 * gb, + TotalMemSize: 10 * gb, + }, + hugepages1Gi: { + Allocatable: 5 * gb, + Free: 4 * gb, + Reserved: 1 * gb, + SystemReserved: 0 * gb, + TotalMemSize: 5 * gb, + }, + }, + }, + 1: &state.NodeState{ + Nodes: []int{0, 1}, + NumberOfAssignments: 4, + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 9 * gb, + Free: 5 * gb, + Reserved: 4 * gb, + SystemReserved: 1 * gb, + TotalMemSize: 10 * gb, + }, + hugepages1Gi: { + Allocatable: 5 * gb, + Free: 4 * gb, + Reserved: 1 * gb, + SystemReserved: 0, + TotalMemSize: 5 * gb, + }, + }, + }, + }, + expectedMachineState: state.NodeMap{ + 0: &state.NodeState{ + Nodes: []int{0}, + NumberOfAssignments: 0, + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 9 * gb, + Free: 9 * gb, + Reserved: 0 * gb, + SystemReserved: 1 * gb, + TotalMemSize: 10 * gb, + }, + hugepages1Gi: { + Allocatable: 5 * gb, + Free: 5 * gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: 5 * gb, + }, + }, + }, + 1: &state.NodeState{ + Nodes: []int{1}, + NumberOfAssignments: 0, + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 9 * gb, + Free: 9 * gb, + Reserved: 0 * gb, + SystemReserved: 1 * gb, + TotalMemSize: 10 * gb, + }, + hugepages1Gi: { + Allocatable: 5 * gb, + Free: 5 * gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: 5 * gb, + }, + }, + }, + }, + }, + } + for _, testCase := range testCases { + t.Run(testCase.description, func(t *testing.T) { + mgr := &manager{ + policy: returnPolicyByName(testCase), + state: state.NewMemoryState(), + containerMap: containermap.NewContainerMap(), + containerRuntime: mockRuntimeService{ + err: nil, + }, + activePods: func() []*v1.Pod { return nil }, + podStatusProvider: mockPodStatusProvider{}, + } + mgr.sourcesReady = &sourcesReadyStub{} + mgr.state.SetMemoryAssignments(testCase.assignments) + mgr.state.SetMachineState(testCase.machineState) + + mgr.removeStaleState() + + if !areContainerMemoryAssignmentsEqual(mgr.state.GetMemoryAssignments(), testCase.expectedAssignments) { + t.Errorf("Memory Manager removeStaleState() error, expected assignments %v, but got: %v", + testCase.expectedAssignments, mgr.state.GetMemoryAssignments()) + } + if !areMachineStatesEqual(mgr.state.GetMachineState(), testCase.expectedMachineState) { + t.Fatalf("The actual machine state: %v is different from the expected one: %v", mgr.state.GetMachineState(), testCase.expectedMachineState) + } + }) + + } +} + +func TestAddContainer(t *testing.T) { + machineInfo := returnMachineInfo() + reserved := systemReservedMemory{ + 0: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 1 * gb, + }, + 1: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 1 * gb, + }, + } + pod := getPod("fakePod1", "fakeContainer1", requirementsGuaranteed) + testCases := []testMemoryManager{ + { + description: "Correct allocation and adding container on NUMA 0", + policyName: policyTypeStatic, + machineInfo: machineInfo, + reserved: reserved, + machineState: state.NodeMap{ + 0: &state.NodeState{ + Nodes: []int{0}, + NumberOfAssignments: 0, + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 9 * gb, + Free: 9 * gb, + Reserved: 0 * gb, + SystemReserved: 1 * gb, + TotalMemSize: 10 * gb, + }, + hugepages1Gi: { + Allocatable: 5 * gb, + Free: 5 * gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: 5 * gb, + }, + }, + }, + 1: &state.NodeState{ + Nodes: []int{1}, + NumberOfAssignments: 0, + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 9 * gb, + Free: 9 * gb, + Reserved: 0 * gb, + SystemReserved: 1 * gb, + TotalMemSize: 10 * gb, + }, + hugepages1Gi: { + Allocatable: 5 * gb, + Free: 5 * gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: 5 * gb, + }, + }, + }, + }, + expectedMachineState: state.NodeMap{ + 0: &state.NodeState{ + Nodes: []int{0}, + NumberOfAssignments: 2, + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 9 * gb, + Free: 8 * gb, + Reserved: 1 * gb, + SystemReserved: 1 * gb, + TotalMemSize: 10 * gb, + }, + hugepages1Gi: { + Allocatable: 5 * gb, + Free: 4 * gb, + Reserved: 1 * gb, + SystemReserved: 0, + TotalMemSize: 5 * gb, + }, + }, + }, + 1: &state.NodeState{ + Nodes: []int{1}, + NumberOfAssignments: 0, + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 9 * gb, + Free: 9 * gb, + Reserved: 0 * gb, + SystemReserved: 1 * gb, + TotalMemSize: 10 * gb, + }, + hugepages1Gi: { + Allocatable: 5 * gb, + Free: 5 * gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: 5 * gb, + }, + }, + }, + }, + expectedAllocateError: nil, + expectedAddContainerError: nil, + updateError: nil, + podAllocate: pod, + assignments: state.ContainerMemoryAssignments{}, + activePods: nil, + }, + { + description: "Shouldn't return any error when policy is set as None", + updateError: nil, + policyName: policyTypeNone, + machineInfo: machineInfo, + reserved: reserved, + machineState: state.NodeMap{}, + expectedMachineState: state.NodeMap{}, + expectedAllocateError: nil, + expectedAddContainerError: nil, + podAllocate: pod, + assignments: state.ContainerMemoryAssignments{}, + activePods: nil, + }, + { + description: "Allocation should fail if policy returns an error", + updateError: nil, + policyName: policyTypeMock, + machineInfo: machineInfo, + reserved: reserved, + machineState: state.NodeMap{ + 0: &state.NodeState{ + Nodes: []int{0}, + NumberOfAssignments: 0, + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 9 * gb, + Free: 9 * gb, + Reserved: 0 * gb, + SystemReserved: 1 * gb, + TotalMemSize: 10 * gb, + }, + hugepages1Gi: { + Allocatable: 5 * gb, + Free: 5 * gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: 5 * gb, + }, + }, + }, + 1: &state.NodeState{ + Nodes: []int{1}, + NumberOfAssignments: 0, + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 9 * gb, + Free: 9 * gb, + Reserved: 0 * gb, + SystemReserved: 1 * gb, + TotalMemSize: 10 * gb, + }, + hugepages1Gi: { + Allocatable: 5 * gb, + Free: 5 * gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: 5 * gb, + }, + }, + }, + }, + expectedMachineState: state.NodeMap{ + 0: &state.NodeState{ + Nodes: []int{0}, + NumberOfAssignments: 0, + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 9 * gb, + Free: 9 * gb, + Reserved: 0 * gb, + SystemReserved: 1 * gb, + TotalMemSize: 10 * gb, + }, + hugepages1Gi: { + Allocatable: 5 * gb, + Free: 5 * gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: 5 * gb, + }, + }, + }, + 1: &state.NodeState{ + Nodes: []int{1}, + NumberOfAssignments: 0, + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 9 * gb, + Free: 9 * gb, + Reserved: 0 * gb, + SystemReserved: 1 * gb, + TotalMemSize: 10 * gb, + }, + hugepages1Gi: { + Allocatable: 5 * gb, + Free: 5 * gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: 5 * gb, + }, + }, + }, + }, + expectedAllocateError: fmt.Errorf("fake reg error"), + expectedAddContainerError: nil, + podAllocate: pod, + assignments: state.ContainerMemoryAssignments{}, + activePods: nil, + }, + { + description: "Adding container should fail (CRI error) but without an error", + updateError: fmt.Errorf("fake reg error"), + policyName: policyTypeStatic, + machineInfo: machineInfo, + reserved: reserved, + machineState: state.NodeMap{ + 0: &state.NodeState{ + Nodes: []int{0}, + NumberOfAssignments: 0, + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 9 * gb, + Free: 9 * gb, + Reserved: 0 * gb, + SystemReserved: 1 * gb, + TotalMemSize: 10 * gb, + }, + hugepages1Gi: { + Allocatable: 5 * gb, + Free: 5 * gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: 5 * gb, + }, + }, + }, + 1: &state.NodeState{ + Nodes: []int{1}, + NumberOfAssignments: 0, + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 9 * gb, + Free: 9 * gb, + Reserved: 0 * gb, + SystemReserved: 1 * gb, + TotalMemSize: 10 * gb, + }, + hugepages1Gi: { + Allocatable: 5 * gb, + Free: 5 * gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: 5 * gb, + }, + }, + }, + }, + expectedMachineState: state.NodeMap{ + 0: &state.NodeState{ + Nodes: []int{0}, + NumberOfAssignments: 0, + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 9 * gb, + Free: 9 * gb, + Reserved: 0 * gb, + SystemReserved: 1 * gb, + TotalMemSize: 10 * gb, + }, + hugepages1Gi: { + Allocatable: 5 * gb, + Free: 5 * gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: 5 * gb, + }, + }, + }, + 1: &state.NodeState{ + Nodes: []int{1}, + NumberOfAssignments: 0, + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 9 * gb, + Free: 9 * gb, + Reserved: 0 * gb, + SystemReserved: 1 * gb, + TotalMemSize: 10 * gb, + }, + hugepages1Gi: { + Allocatable: 5 * gb, + Free: 5 * gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: 5 * gb, + }, + }, + }, + }, + expectedAllocateError: nil, + expectedAddContainerError: nil, + podAllocate: pod, + assignments: state.ContainerMemoryAssignments{}, + activePods: nil, + }, + { + description: "Correct allocation of container requiring amount of memory higher than capacity of one NUMA node", + policyName: policyTypeStatic, + machineInfo: machineInfo, + reserved: reserved, + machineState: state.NodeMap{ + 0: &state.NodeState{ + Nodes: []int{0}, + NumberOfAssignments: 0, + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 9 * gb, + Free: 9 * gb, + Reserved: 0 * gb, + SystemReserved: 1 * gb, + TotalMemSize: 10 * gb, + }, + hugepages1Gi: { + Allocatable: 5 * gb, + Free: 5 * gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: 5 * gb, + }, + }, + }, + 1: &state.NodeState{ + Nodes: []int{1}, + NumberOfAssignments: 0, + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 9 * gb, + Free: 9 * gb, + Reserved: 0 * gb, + SystemReserved: 1 * gb, + TotalMemSize: 10 * gb, + }, + hugepages1Gi: { + Allocatable: 5 * gb, + Free: 5 * gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: 5 * gb, + }, + }, + }, + }, + expectedMachineState: state.NodeMap{ + 0: &state.NodeState{ + Nodes: []int{0, 1}, + NumberOfAssignments: 2, + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 9 * gb, + Free: 0 * gb, + Reserved: 9 * gb, + SystemReserved: 1 * gb, + TotalMemSize: 10 * gb, + }, + hugepages1Gi: { + Allocatable: 5 * gb, + Free: 1 * gb, + Reserved: 4 * gb, + SystemReserved: 0, + TotalMemSize: 5 * gb, + }, + }, + }, + 1: &state.NodeState{ + Nodes: []int{0, 1}, + NumberOfAssignments: 2, + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 9 * gb, + Free: 6 * gb, + Reserved: 3 * gb, + SystemReserved: 1 * gb, + TotalMemSize: 10 * gb, + }, + hugepages1Gi: { + Allocatable: 5 * gb, + Free: 5 * gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: 5 * gb, + }, + }, + }, + }, + expectedAllocateError: nil, + expectedAddContainerError: nil, + podAllocate: getPod("fakePod1", "fakeContainer1", &v1.ResourceRequirements{ + Limits: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("1000Mi"), + v1.ResourceMemory: resource.MustParse("12Gi"), + hugepages1Gi: resource.MustParse("4Gi"), + }, + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("1000Mi"), + v1.ResourceMemory: resource.MustParse("12Gi"), + hugepages1Gi: resource.MustParse("4Gi"), + }, + }), + assignments: state.ContainerMemoryAssignments{}, + activePods: nil, + }, + { + description: "Should fail if try to allocate container requiring amount of memory higher than capacity of one NUMA node but a small pod is already allocated", + policyName: policyTypeStatic, + machineInfo: machineInfo, + firstPod: pod, + reserved: reserved, + machineState: state.NodeMap{ + 0: &state.NodeState{ + Nodes: []int{0}, + NumberOfAssignments: 2, + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 9 * gb, + Free: 8 * gb, + Reserved: 1 * gb, + SystemReserved: 1 * gb, + TotalMemSize: 10 * gb, + }, + hugepages1Gi: { + Allocatable: 5 * gb, + Free: 4 * gb, + Reserved: 1 * gb, + SystemReserved: 0, + TotalMemSize: 5 * gb, + }, + }, + }, + 1: &state.NodeState{ + Nodes: []int{1}, + NumberOfAssignments: 0, + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 9 * gb, + Free: 9 * gb, + Reserved: 0 * gb, + SystemReserved: 1 * gb, + TotalMemSize: 10 * gb, + }, + hugepages1Gi: { + Allocatable: 5 * gb, + Free: 5 * gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: 5 * gb, + }, + }, + }, + }, + assignments: state.ContainerMemoryAssignments{ + "fakePod1": map[string][]state.Block{ + "fakeContainer1": { + { + NUMAAffinity: []int{0}, + Type: v1.ResourceMemory, + Size: 1 * gb, + }, + { + NUMAAffinity: []int{0}, + Type: hugepages1Gi, + Size: 1 * gb, + }, + }, + }, + }, + expectedMachineState: state.NodeMap{ + 0: &state.NodeState{ + Nodes: []int{0}, + NumberOfAssignments: 2, + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 9 * gb, + Free: 8 * gb, + Reserved: 1 * gb, + SystemReserved: 1 * gb, + TotalMemSize: 10 * gb, + }, + hugepages1Gi: { + Allocatable: 5 * gb, + Free: 4 * gb, + Reserved: 1 * gb, + SystemReserved: 0, + TotalMemSize: 5 * gb, + }, + }, + }, + 1: &state.NodeState{ + Nodes: []int{1}, + NumberOfAssignments: 0, + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 9 * gb, + Free: 9 * gb, + Reserved: 0 * gb, + SystemReserved: 1 * gb, + TotalMemSize: 10 * gb, + }, + hugepages1Gi: { + Allocatable: 5 * gb, + Free: 5 * gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: 5 * gb, + }, + }, + }, + }, + expectedAllocateError: fmt.Errorf("[memorymanager] failed to get the default NUMA affinity, no NUMA nodes with enough memory is available"), + expectedAddContainerError: nil, + podAllocate: getPod("fakePod2", "fakeContainer2", &v1.ResourceRequirements{ + Limits: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("1000Mi"), + v1.ResourceMemory: resource.MustParse("12Gi"), + hugepages1Gi: resource.MustParse("4Gi"), + }, + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("1000Mi"), + v1.ResourceMemory: resource.MustParse("12Gi"), + hugepages1Gi: resource.MustParse("4Gi"), + }, + }), + activePods: []*v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + UID: types.UID("fakePod1"), + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainer1", + Resources: *requirementsGuaranteed, + }, + }, + }, + }, + }, + }, + } + + for _, testCase := range testCases { + t.Run(testCase.description, func(t *testing.T) { + mgr := &manager{ + policy: returnPolicyByName(testCase), + state: state.NewMemoryState(), + containerMap: containermap.NewContainerMap(), + containerRuntime: mockRuntimeService{ + err: testCase.updateError, + }, + activePods: func() []*v1.Pod { return testCase.activePods }, + podStatusProvider: mockPodStatusProvider{}, + } + mgr.sourcesReady = &sourcesReadyStub{} + mgr.state.SetMachineState(testCase.machineState) + mgr.state.SetMemoryAssignments(testCase.assignments) + if testCase.firstPod != nil { + mgr.containerMap.Add(testCase.firstPod.Name, testCase.firstPod.Spec.Containers[0].Name, "fakeID0") + } + pod := testCase.podAllocate + container := &pod.Spec.Containers[0] + err := mgr.Allocate(pod, container) + if !reflect.DeepEqual(err, testCase.expectedAllocateError) { + t.Errorf("Memory Manager Allocate() error (%v), expected error: %v, but got: %v", + testCase.description, testCase.expectedAllocateError, err) + } + err = mgr.AddContainer(pod, container, "fakeID") + if !reflect.DeepEqual(err, testCase.expectedAddContainerError) { + t.Errorf("Memory Manager AddContainer() error (%v), expected error: %v, but got: %v", + testCase.description, testCase.expectedAddContainerError, err) + } + + if !areMachineStatesEqual(mgr.state.GetMachineState(), testCase.expectedMachineState) { + t.Errorf("[test] %+v", mgr.state.GetMemoryAssignments()) + t.Fatalf("The actual machine state: %v is different from the expected one: %v", mgr.state.GetMachineState(), testCase.expectedMachineState) + } + + }) + } +} + +func TestRemoveContainer(t *testing.T) { + machineInfo := returnMachineInfo() + reserved := systemReservedMemory{ + 0: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 1 * gb, + }, + 1: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 1 * gb, + }, + } + testCases := []testMemoryManager{ + { + description: "Correct removing of a container", + removeContainerID: "fakeID2", + policyName: policyTypeStatic, + machineInfo: machineInfo, + reserved: reserved, + assignments: state.ContainerMemoryAssignments{ + "fakePod1": map[string][]state.Block{ + "fakeContainer1": { + { + NUMAAffinity: []int{0}, + Type: v1.ResourceMemory, + Size: 1 * gb, + }, + { + NUMAAffinity: []int{0}, + Type: hugepages1Gi, + Size: 1 * gb, + }, + }, + "fakeContainer2": { + { + NUMAAffinity: []int{0}, + Type: v1.ResourceMemory, + Size: 1 * gb, + }, + { + NUMAAffinity: []int{0}, + Type: hugepages1Gi, + Size: 1 * gb, + }, + }, + }, + }, + expectedAssignments: state.ContainerMemoryAssignments{ + "fakePod1": map[string][]state.Block{ + "fakeContainer1": { + { + NUMAAffinity: []int{0}, + Type: v1.ResourceMemory, + Size: 1 * gb, + }, + { + NUMAAffinity: []int{0}, + Type: hugepages1Gi, + Size: 1 * gb, + }, + }, + }, + }, + machineState: state.NodeMap{ + 0: &state.NodeState{ + Nodes: []int{0}, + NumberOfAssignments: 4, + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 9 * gb, + Free: 7 * gb, + Reserved: 2 * gb, + SystemReserved: 1 * gb, + TotalMemSize: 10 * gb, + }, + hugepages1Gi: { + Allocatable: 5 * gb, + Free: 3 * gb, + Reserved: 2 * gb, + SystemReserved: 0 * gb, + TotalMemSize: 5 * gb, + }, + }, + }, + 1: &state.NodeState{ + Nodes: []int{1}, + NumberOfAssignments: 0, + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 9 * gb, + Free: 9 * gb, + Reserved: 0 * gb, + SystemReserved: 1 * gb, + TotalMemSize: 10 * gb, + }, + hugepages1Gi: { + Allocatable: 5 * gb, + Free: 5 * gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: 5 * gb, + }, + }, + }, + }, + expectedMachineState: state.NodeMap{ + 0: &state.NodeState{ + Nodes: []int{0}, + NumberOfAssignments: 2, + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 9 * gb, + Free: 8 * gb, + Reserved: 1 * gb, + SystemReserved: 1 * gb, + TotalMemSize: 10 * gb, + }, + hugepages1Gi: { + Allocatable: 5 * gb, + Free: 4 * gb, + Reserved: 1 * gb, + SystemReserved: 0, + TotalMemSize: 5 * gb, + }, + }, + }, + 1: &state.NodeState{ + Nodes: []int{1}, + NumberOfAssignments: 0, + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 9 * gb, + Free: 9 * gb, + Reserved: 0 * gb, + SystemReserved: 1 * gb, + TotalMemSize: 10 * gb, + }, + hugepages1Gi: { + Allocatable: 5 * gb, + Free: 5 * gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: 5 * gb, + }, + }, + }, + }, + expectedError: nil, + }, + { + description: "Correct removing of a multi NUMA container", + removeContainerID: "fakeID2", + policyName: policyTypeStatic, + machineInfo: machineInfo, + reserved: reserved, + assignments: state.ContainerMemoryAssignments{ + "fakePod1": map[string][]state.Block{ + "fakeContainer1": { + { + NUMAAffinity: []int{0, 1}, + Type: v1.ResourceMemory, + Size: 1 * gb, + }, + { + NUMAAffinity: []int{0, 1}, + Type: hugepages1Gi, + Size: 1 * gb, + }, + }, + "fakeContainer2": { + { + NUMAAffinity: []int{0, 1}, + Type: v1.ResourceMemory, + Size: 12 * gb, + }, + { + NUMAAffinity: []int{0, 1}, + Type: hugepages1Gi, + Size: 1 * gb, + }, + }, + }, + }, + expectedAssignments: state.ContainerMemoryAssignments{ + "fakePod1": map[string][]state.Block{ + "fakeContainer1": { + { + NUMAAffinity: []int{0, 1}, + Type: v1.ResourceMemory, + Size: 1 * gb, + }, + { + NUMAAffinity: []int{0, 1}, + Type: hugepages1Gi, + Size: 1 * gb, + }, + }, + }, + }, + machineState: state.NodeMap{ + 0: &state.NodeState{ + Nodes: []int{0, 1}, + NumberOfAssignments: 4, + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 9 * gb, + Free: 0 * gb, + Reserved: 9 * gb, + SystemReserved: 1 * gb, + TotalMemSize: 10 * gb, + }, + hugepages1Gi: { + Allocatable: 5 * gb, + Free: 3 * gb, + Reserved: 2 * gb, + SystemReserved: 0 * gb, + TotalMemSize: 5 * gb, + }, + }, + }, + 1: &state.NodeState{ + Nodes: []int{0, 1}, + NumberOfAssignments: 4, + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 9 * gb, + Free: 5 * gb, + Reserved: 4 * gb, + SystemReserved: 1 * gb, + TotalMemSize: 10 * gb, + }, + hugepages1Gi: { + Allocatable: 5 * gb, + Free: 5 * gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: 5 * gb, + }, + }, + }, + }, + expectedMachineState: state.NodeMap{ + 0: &state.NodeState{ + Nodes: []int{0, 1}, + NumberOfAssignments: 2, + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 9 * gb, + Free: 9 * gb, + Reserved: 0 * gb, + SystemReserved: 1 * gb, + TotalMemSize: 10 * gb, + }, + hugepages1Gi: { + Allocatable: 5 * gb, + Free: 4 * gb, + Reserved: 1 * gb, + SystemReserved: 0, + TotalMemSize: 5 * gb, + }, + }, + }, + 1: &state.NodeState{ + Nodes: []int{0, 1}, + NumberOfAssignments: 2, + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 9 * gb, + Free: 8 * gb, + Reserved: 1 * gb, + SystemReserved: 1 * gb, + TotalMemSize: 10 * gb, + }, + hugepages1Gi: { + Allocatable: 5 * gb, + Free: 5 * gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: 5 * gb, + }, + }, + }, + }, + expectedError: nil, + }, + { + description: "Should fail if policy returns an error", + removeContainerID: "fakeID1", + policyName: policyTypeMock, + machineInfo: machineInfo, + reserved: reserved, + assignments: state.ContainerMemoryAssignments{ + "fakePod1": map[string][]state.Block{ + "fakeContainer1": { + { + NUMAAffinity: []int{0}, + Type: v1.ResourceMemory, + Size: 1 * gb, + }, + { + NUMAAffinity: []int{0}, + Type: hugepages1Gi, + Size: 1 * gb, + }, + }, + "fakeContainer2": { + { + NUMAAffinity: []int{0}, + Type: v1.ResourceMemory, + Size: 1 * gb, + }, + { + NUMAAffinity: []int{0}, + Type: hugepages1Gi, + Size: 1 * gb, + }, + }, + }, + }, + expectedAssignments: state.ContainerMemoryAssignments{ + "fakePod1": map[string][]state.Block{ + "fakeContainer1": { + { + NUMAAffinity: []int{0}, + Type: v1.ResourceMemory, + Size: 1 * gb, + }, + { + NUMAAffinity: []int{0}, + Type: hugepages1Gi, + Size: 1 * gb, + }, + }, + "fakeContainer2": { + { + NUMAAffinity: []int{0}, + Type: v1.ResourceMemory, + Size: 1 * gb, + }, + { + NUMAAffinity: []int{0}, + Type: hugepages1Gi, + Size: 1 * gb, + }, + }, + }, + }, + machineState: state.NodeMap{ + 0: &state.NodeState{ + Nodes: []int{0}, + NumberOfAssignments: 4, + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 9 * gb, + Free: 7 * gb, + Reserved: 2 * gb, + SystemReserved: 1 * gb, + TotalMemSize: 10 * gb, + }, + hugepages1Gi: { + Allocatable: 5 * gb, + Free: 3 * gb, + Reserved: 2 * gb, + SystemReserved: 0 * gb, + TotalMemSize: 5 * gb, + }, + }, + }, + 1: &state.NodeState{ + Nodes: []int{1}, + NumberOfAssignments: 0, + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 9 * gb, + Free: 9 * gb, + Reserved: 0 * gb, + SystemReserved: 1 * gb, + TotalMemSize: 10 * gb, + }, + hugepages1Gi: { + Allocatable: 5 * gb, + Free: 5 * gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: 5 * gb, + }, + }, + }, + }, + expectedMachineState: state.NodeMap{ + 0: &state.NodeState{ + Nodes: []int{0}, + NumberOfAssignments: 4, + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 9 * gb, + Free: 7 * gb, + Reserved: 2 * gb, + SystemReserved: 1 * gb, + TotalMemSize: 10 * gb, + }, + hugepages1Gi: { + Allocatable: 5 * gb, + Free: 3 * gb, + Reserved: 2 * gb, + SystemReserved: 0 * gb, + TotalMemSize: 5 * gb, + }, + }, + }, + 1: &state.NodeState{ + Nodes: []int{1}, + NumberOfAssignments: 0, + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 9 * gb, + Free: 9 * gb, + Reserved: 0 * gb, + SystemReserved: 1 * gb, + TotalMemSize: 10 * gb, + }, + hugepages1Gi: { + Allocatable: 5 * gb, + Free: 5 * gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: 5 * gb, + }, + }, + }, + }, + expectedError: fmt.Errorf("fake reg error"), + }, + { + description: "Should do nothing if container is not in containerMap", + removeContainerID: "fakeID3", + policyName: policyTypeStatic, + machineInfo: machineInfo, + reserved: reserved, + assignments: state.ContainerMemoryAssignments{ + "fakePod1": map[string][]state.Block{ + "fakeContainer1": { + { + NUMAAffinity: []int{0}, + Type: v1.ResourceMemory, + Size: 1 * gb, + }, + { + NUMAAffinity: []int{0}, + Type: hugepages1Gi, + Size: 1 * gb, + }, + }, + "fakeContainer2": { + { + NUMAAffinity: []int{0}, + Type: v1.ResourceMemory, + Size: 1 * gb, + }, + { + NUMAAffinity: []int{0}, + Type: hugepages1Gi, + Size: 1 * gb, + }, + }, + }, + }, + expectedAssignments: state.ContainerMemoryAssignments{ + "fakePod1": map[string][]state.Block{ + "fakeContainer1": { + { + NUMAAffinity: []int{0}, + Type: v1.ResourceMemory, + Size: 1 * gb, + }, + { + NUMAAffinity: []int{0}, + Type: hugepages1Gi, + Size: 1 * gb, + }, + }, + "fakeContainer2": { + { + NUMAAffinity: []int{0}, + Type: v1.ResourceMemory, + Size: 1 * gb, + }, + { + NUMAAffinity: []int{0}, + Type: hugepages1Gi, + Size: 1 * gb, + }, + }, + }, + }, + machineState: state.NodeMap{ + 0: &state.NodeState{ + Nodes: []int{0}, + NumberOfAssignments: 4, + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 9 * gb, + Free: 7 * gb, + Reserved: 2 * gb, + SystemReserved: 1 * gb, + TotalMemSize: 10 * gb, + }, + hugepages1Gi: { + Allocatable: 5 * gb, + Free: 3 * gb, + Reserved: 2 * gb, + SystemReserved: 0 * gb, + TotalMemSize: 5 * gb, + }, + }, + }, + 1: &state.NodeState{ + Nodes: []int{1}, + NumberOfAssignments: 0, + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 9 * gb, + Free: 9 * gb, + Reserved: 0 * gb, + SystemReserved: 1 * gb, + TotalMemSize: 10 * gb, + }, + hugepages1Gi: { + Allocatable: 5 * gb, + Free: 5 * gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: 5 * gb, + }, + }, + }, + }, + expectedMachineState: state.NodeMap{ + 0: &state.NodeState{ + Nodes: []int{0}, + NumberOfAssignments: 4, + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 9 * gb, + Free: 7 * gb, + Reserved: 2 * gb, + SystemReserved: 1 * gb, + TotalMemSize: 10 * gb, + }, + hugepages1Gi: { + Allocatable: 5 * gb, + Free: 3 * gb, + Reserved: 2 * gb, + SystemReserved: 0 * gb, + TotalMemSize: 5 * gb, + }, + }, + }, + 1: &state.NodeState{ + Nodes: []int{1}, + NumberOfAssignments: 0, + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 9 * gb, + Free: 9 * gb, + Reserved: 0 * gb, + SystemReserved: 1 * gb, + TotalMemSize: 10 * gb, + }, + hugepages1Gi: { + Allocatable: 5 * gb, + Free: 5 * gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: 5 * gb, + }, + }, + }, + }, + expectedError: nil, + }, + } + for _, testCase := range testCases { + t.Run(testCase.description, func(t *testing.T) { + iniContainerMap := containermap.NewContainerMap() + iniContainerMap.Add("fakePod1", "fakeContainer1", "fakeID1") + iniContainerMap.Add("fakePod1", "fakeContainer2", "fakeID2") + mgr := &manager{ + policy: returnPolicyByName(testCase), + state: state.NewMemoryState(), + containerMap: iniContainerMap, + containerRuntime: mockRuntimeService{ + err: testCase.expectedError, + }, + activePods: func() []*v1.Pod { return nil }, + podStatusProvider: mockPodStatusProvider{}, + } + mgr.sourcesReady = &sourcesReadyStub{} + mgr.state.SetMemoryAssignments(testCase.assignments) + mgr.state.SetMachineState(testCase.machineState) + + err := mgr.RemoveContainer(testCase.removeContainerID) + if !reflect.DeepEqual(err, testCase.expectedError) { + t.Errorf("Memory Manager RemoveContainer() error (%v), expected error: %v, but got: %v", + testCase.description, testCase.expectedError, err) + } + + if !areContainerMemoryAssignmentsEqual(mgr.state.GetMemoryAssignments(), testCase.expectedAssignments) { + t.Fatalf("Memory Manager RemoveContainer() inconsistent assignment, expected: %+v, but got: %+v, start %+v", + testCase.expectedAssignments, mgr.state.GetMemoryAssignments(), testCase.expectedAssignments) + } + + if !areMachineStatesEqual(mgr.state.GetMachineState(), testCase.expectedMachineState) { + t.Errorf("[test] %+v", mgr.state.GetMemoryAssignments()) + t.Errorf("[test] %+v, %+v", mgr.state.GetMachineState()[0].MemoryMap["memory"], mgr.state.GetMachineState()[1].MemoryMap["memory"]) + t.Fatalf("The actual machine state: %v is different from the expected one: %v", mgr.state.GetMachineState(), testCase.expectedMachineState) + } + }) + } +} + +func TestNewManager(t *testing.T) { + machineInfo := returnMachineInfo() + expectedReserved := systemReservedMemory{ + 0: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 1 * gb, + }, + 1: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 1 * gb, + }, + } + testCases := []testMemoryManager{ + { + description: "Successful creation of Memory Manager instance", + policyName: policyTypeStatic, + machineInfo: machineInfo, + nodeAllocatableReservation: v1.ResourceList{v1.ResourceMemory: *resource.NewQuantity(2*gb, resource.BinarySI)}, + systemReservedMemory: map[int]map[v1.ResourceName]resource.Quantity{ + 0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(gb, resource.BinarySI)}, + 1: nodeResources{v1.ResourceMemory: *resource.NewQuantity(gb, resource.BinarySI)}, + }, + affinity: topologymanager.NewFakeManager(), + expectedError: nil, + expectedReserved: expectedReserved, + }, + { + description: "Should return an error when systemReservedMemory (configured with kubelet flag) does not comply with Node Allocatable feature values", + policyName: policyTypeStatic, + machineInfo: machineInfo, + nodeAllocatableReservation: v1.ResourceList{v1.ResourceMemory: *resource.NewQuantity(2*gb, resource.BinarySI)}, + systemReservedMemory: map[int]map[v1.ResourceName]resource.Quantity{ + 0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(gb, resource.BinarySI)}, + 1: nodeResources{v1.ResourceMemory: *resource.NewQuantity(2*gb, resource.BinarySI)}, + }, + affinity: topologymanager.NewFakeManager(), + expectedError: fmt.Errorf("the total amount of memory of type %q is not equal to the value determined by Node Allocatable feature", v1.ResourceMemory), + expectedReserved: expectedReserved, + }, + { + description: "Should return an error when memory reserved for system is empty (systemReservedMemory)", + policyName: policyTypeStatic, + machineInfo: machineInfo, + nodeAllocatableReservation: v1.ResourceList{}, + systemReservedMemory: map[int]map[v1.ResourceName]resource.Quantity{}, + affinity: topologymanager.NewFakeManager(), + expectedError: fmt.Errorf("[memorymanager] you should specify the system reserved memory"), + expectedReserved: expectedReserved, + }, + { + description: "Should return an error when policy name is not correct", + policyName: "fake", + machineInfo: machineInfo, + nodeAllocatableReservation: v1.ResourceList{}, + systemReservedMemory: map[int]map[v1.ResourceName]resource.Quantity{}, + affinity: topologymanager.NewFakeManager(), + expectedError: fmt.Errorf("unknown policy: \"fake\""), + expectedReserved: expectedReserved, + }, + { + description: "Should create manager with \"none\" policy", + policyName: policyTypeNone, + machineInfo: machineInfo, + nodeAllocatableReservation: v1.ResourceList{}, + systemReservedMemory: map[int]map[v1.ResourceName]resource.Quantity{}, + affinity: topologymanager.NewFakeManager(), + expectedError: nil, + expectedReserved: expectedReserved, + }, + } + for _, testCase := range testCases { + t.Run(testCase.description, func(t *testing.T) { + stateFileDirectory, err := ioutil.TempDir("/tmp/", "memory_manager_tests") + if err != nil { + t.Errorf("Cannot create state file: %s", err.Error()) + } + defer os.RemoveAll(stateFileDirectory) + + mgr, err := NewManager(string(testCase.policyName), &testCase.machineInfo, testCase.nodeAllocatableReservation, testCase.systemReservedMemory, stateFileDirectory, testCase.affinity) + + if !reflect.DeepEqual(err, testCase.expectedError) { + t.Errorf("Could not create the Memory Manager. Expected error: '%v', but got: '%v'", + testCase.expectedError, err) + } + + if testCase.expectedError == nil { + if mgr != nil { + rawMgr := mgr.(*manager) + if !reflect.DeepEqual(rawMgr.policy.Name(), string(testCase.policyName)) { + t.Errorf("Could not create the Memory Manager. Expected policy name: %v, but got: %v", + testCase.policyName, rawMgr.policy.Name()) + } + if testCase.policyName == policyTypeStatic { + if !reflect.DeepEqual(rawMgr.policy.(*staticPolicy).systemReserved, testCase.expectedReserved) { + t.Errorf("Could not create the Memory Manager. Expected system reserved: %+v, but got: %+v", + testCase.expectedReserved, rawMgr.policy.(*staticPolicy).systemReserved) + } + } + } else { + t.Errorf("Could not create the Memory Manager - manager is nil, but it should not be.") + } + + } + }) + } +} + +func TestGetTopologyHints(t *testing.T) { + testCases := []testMemoryManager{ + { + description: "Successful hint generation", + policyName: policyTypeStatic, + machineInfo: returnMachineInfo(), + reserved: systemReservedMemory{ + 0: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 1 * gb, + }, + 1: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 1 * gb, + }, + }, + assignments: state.ContainerMemoryAssignments{ + "fakePod1": map[string][]state.Block{ + "fakeContainer1": { + { + NUMAAffinity: []int{0}, + Type: v1.ResourceMemory, + Size: 1 * gb, + }, + { + NUMAAffinity: []int{0}, + Type: hugepages1Gi, + Size: 1 * gb, + }, + }, + "fakeContainer2": { + { + NUMAAffinity: []int{0}, + Type: v1.ResourceMemory, + Size: 1 * gb, + }, + { + NUMAAffinity: []int{0}, + Type: hugepages1Gi, + Size: 1 * gb, + }, + }, + }, + }, + machineState: state.NodeMap{ + 0: &state.NodeState{ + Nodes: []int{0}, + NumberOfAssignments: 4, + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 9 * gb, + Free: 7 * gb, + Reserved: 2 * gb, + SystemReserved: 1 * gb, + TotalMemSize: 10 * gb, + }, + hugepages1Gi: { + Allocatable: 5 * gb, + Free: 3 * gb, + Reserved: 2 * gb, + SystemReserved: 0 * gb, + TotalMemSize: 5 * gb, + }, + }, + }, + 1: &state.NodeState{ + Nodes: []int{1}, + NumberOfAssignments: 0, + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 9 * gb, + Free: 9 * gb, + Reserved: 0 * gb, + SystemReserved: 1 * gb, + TotalMemSize: 10 * gb, + }, + hugepages1Gi: { + Allocatable: 5 * gb, + Free: 5 * gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: 5 * gb, + }, + }, + }, + }, + expectedError: nil, + expectedHints: map[string][]topologymanager.TopologyHint{ + string(v1.ResourceMemory): { + { + NUMANodeAffinity: newNUMAAffinity(0), + Preferred: true, + }, + { + NUMANodeAffinity: newNUMAAffinity(1), + Preferred: true, + }, + { + NUMANodeAffinity: newNUMAAffinity(0, 1), + Preferred: false, + }, + }, + string(hugepages1Gi): { + { + NUMANodeAffinity: newNUMAAffinity(0), + Preferred: true, + }, + { + NUMANodeAffinity: newNUMAAffinity(1), + Preferred: true, + }, + { + NUMANodeAffinity: newNUMAAffinity(0, 1), + Preferred: false, + }, + }, + }, + }, + } + + for _, testCase := range testCases { + t.Run(testCase.description, func(t *testing.T) { + mgr := &manager{ + policy: returnPolicyByName(testCase), + state: state.NewMemoryState(), + containerMap: containermap.NewContainerMap(), + containerRuntime: mockRuntimeService{ + err: nil, + }, + activePods: func() []*v1.Pod { return nil }, + podStatusProvider: mockPodStatusProvider{}, + } + mgr.sourcesReady = &sourcesReadyStub{} + mgr.state.SetMachineState(testCase.machineState.Clone()) + mgr.state.SetMemoryAssignments(testCase.assignments.Clone()) + + pod := getPod("fakePod1", "fakeContainer1", requirementsGuaranteed) + container := &pod.Spec.Containers[0] + hints := mgr.GetTopologyHints(pod, container) + if !reflect.DeepEqual(hints, testCase.expectedHints) { + t.Errorf("Hints were not generated correctly. Hints generated: %+v, hints expected: %+v", + hints, testCase.expectedHints) } }) } + +} + +func returnMachineInfo() cadvisorapi.MachineInfo { + return cadvisorapi.MachineInfo{ + Topology: []cadvisorapi.Node{ + { + Id: 0, + Memory: 10 * gb, + HugePages: []cadvisorapi.HugePagesInfo{ + { + PageSize: pageSize1Gb, + NumPages: 5, + }, + }, + }, + { + Id: 1, + Memory: 10 * gb, + HugePages: []cadvisorapi.HugePagesInfo{ + { + PageSize: pageSize1Gb, + NumPages: 5, + }, + }, + }, + }, + } } diff --git a/pkg/kubelet/cm/memorymanager/policy_static_test.go b/pkg/kubelet/cm/memorymanager/policy_static_test.go new file mode 100644 index 000000000000..cc6238df3463 --- /dev/null +++ b/pkg/kubelet/cm/memorymanager/policy_static_test.go @@ -0,0 +1 @@ +package memorymanager From f7845ed4e9102785d85f1b83dbfdcfd2db03d9ed Mon Sep 17 00:00:00 2001 From: Artyom Lukianov Date: Sun, 11 Oct 2020 12:10:01 +0300 Subject: [PATCH 16/31] memory manager: provide memory manager static policy unittests Signed-off-by: Artyom Lukianov --- .../cm/memorymanager/policy_static_test.go | 2341 +++++++++++++++++ .../topologymanager/fake_topology_manager.go | 20 +- 2 files changed, 2360 insertions(+), 1 deletion(-) diff --git a/pkg/kubelet/cm/memorymanager/policy_static_test.go b/pkg/kubelet/cm/memorymanager/policy_static_test.go index cc6238df3463..49bea36a4843 100644 --- a/pkg/kubelet/cm/memorymanager/policy_static_test.go +++ b/pkg/kubelet/cm/memorymanager/policy_static_test.go @@ -1 +1,2342 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + package memorymanager + +import ( + "fmt" + "reflect" + "testing" + + cadvisorapi "github.com/google/cadvisor/info/v1" + + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/klog/v2" + "k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state" + "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" + "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask" +) + +const ( + mb = 1024 * 1024 + gb = mb * 1024 + pageSize1Gb = 1048576 + hugepages1Gi = v1.ResourceName(v1.ResourceHugePagesPrefix + "1Gi") +) + +var ( + requirementsGuaranteed = &v1.ResourceRequirements{ + Limits: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("1000Mi"), + v1.ResourceMemory: resource.MustParse("1Gi"), + hugepages1Gi: resource.MustParse("1Gi"), + }, + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("1000Mi"), + v1.ResourceMemory: resource.MustParse("1Gi"), + hugepages1Gi: resource.MustParse("1Gi"), + }, + } + requirementsBurstable = &v1.ResourceRequirements{ + Limits: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("1000Mi"), + v1.ResourceMemory: resource.MustParse("2Gi"), + hugepages1Gi: resource.MustParse("2Gi"), + }, + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("1000Mi"), + v1.ResourceMemory: resource.MustParse("1Gi"), + hugepages1Gi: resource.MustParse("1Gi"), + }, + } +) + +func areMemoryBlocksEqual(mb1, mb2 []state.Block) bool { + if len(mb1) != len(mb2) { + return false + } + + copyMemoryBlocks := make([]state.Block, len(mb2)) + copy(copyMemoryBlocks, mb2) + for _, block := range mb1 { + for i, copyBlock := range copyMemoryBlocks { + if reflect.DeepEqual(block, copyBlock) { + // move the element that equals to the block to the end of the slice + copyMemoryBlocks[i] = copyMemoryBlocks[len(copyMemoryBlocks)-1] + + // remove the last element from our slice + copyMemoryBlocks = copyMemoryBlocks[:len(copyMemoryBlocks)-1] + + break + } + } + } + + return len(copyMemoryBlocks) == 0 +} + +func areContainerMemoryAssignmentsEqual(cma1, cma2 state.ContainerMemoryAssignments) bool { + if len(cma1) != len(cma2) { + return false + } + + for podUID, container := range cma1 { + if _, ok := cma2[podUID]; !ok { + klog.Errorf("[memorymanager_tests] the assignment does not have pod UID %s", podUID) + return false + } + + for containerName, memoryBlocks := range container { + if _, ok := cma2[podUID][containerName]; !ok { + klog.Errorf("[memorymanager_tests] the assignment does not have container name %s", containerName) + return false + } + + if !areMemoryBlocksEqual(memoryBlocks, cma2[podUID][containerName]) { + klog.Errorf("[memorymanager_tests] assignments memory blocks are different: %v != %v", memoryBlocks, cma2[podUID][containerName]) + return false + } + } + } + return true +} + +type testStaticPolicy struct { + description string + assignments state.ContainerMemoryAssignments + expectedAssignments state.ContainerMemoryAssignments + machineState state.NodeMap + expectedMachineState state.NodeMap + systemReserved systemReservedMemory + expectedError error + machineInfo *cadvisorapi.MachineInfo + pod *v1.Pod + topologyHint *topologymanager.TopologyHint + expectedTopologyHints map[string][]topologymanager.TopologyHint +} + +func initTests(testCase *testStaticPolicy, hint *topologymanager.TopologyHint) (Policy, state.State, error) { + manager := topologymanager.NewFakeManager() + if hint != nil { + manager = topologymanager.NewFakeManagerWithHint(hint) + } + + p, err := NewPolicyStatic(testCase.machineInfo, testCase.systemReserved, manager) + if err != nil { + return nil, nil, err + } + s := state.NewMemoryState() + s.SetMachineState(testCase.machineState) + s.SetMemoryAssignments(testCase.assignments) + return p, s, nil +} + +func newNUMAAffinity(bits ...int) bitmask.BitMask { + affinity, err := bitmask.NewBitMask(bits...) + if err != nil { + panic(err) + } + return affinity +} + +func TestStaticPolicyNew(t *testing.T) { + testCases := []testStaticPolicy{ + { + description: "should fail, when machine does not have reserved memory for the system workloads", + expectedError: fmt.Errorf("[memorymanager] you should specify the system reserved memory"), + }, + { + description: "should succeed, when at least one NUMA node has reserved memory", + systemReserved: systemReservedMemory{ + 0: map[v1.ResourceName]uint64{}, + 1: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 512 * mb, + }, + }, + }, + } + + for _, testCase := range testCases { + t.Run(testCase.description, func(t *testing.T) { + _, _, err := initTests(&testCase, nil) + if !reflect.DeepEqual(err, testCase.expectedError) { + t.Fatalf("The actual error: %v is different from the expected one: %v", err, testCase.expectedError) + } + }) + } +} + +func TestStaticPolicyName(t *testing.T) { + testCases := []testStaticPolicy{ + { + description: "should return the correct policy name", + systemReserved: systemReservedMemory{ + 0: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 512 * mb, + }, + }, + }, + } + for _, testCase := range testCases { + t.Run(testCase.description, func(t *testing.T) { + p, _, err := initTests(&testCase, nil) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + if p.Name() != string(policyTypeStatic) { + t.Errorf("policy name is different, expected: %q, actual: %q", p.Name(), policyTypeStatic) + } + }) + } +} + +func TestStaticPolicyStart(t *testing.T) { + testCases := []testStaticPolicy{ + { + description: "should fail, if machine state is empty, but it has memory assignments", + assignments: state.ContainerMemoryAssignments{ + "pod": map[string][]state.Block{ + "container1": { + { + NUMAAffinity: []int{0}, + Type: v1.ResourceMemory, + Size: 512 * mb, + }, + }, + }, + }, + systemReserved: systemReservedMemory{ + 0: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 512 * mb, + }, + }, + expectedError: fmt.Errorf("[memorymanager] machine state can not be empty when it has memory assignments"), + }, + { + description: "should fill the state with default values, when the state is empty", + expectedAssignments: state.ContainerMemoryAssignments{}, + expectedMachineState: state.NodeMap{ + 0: &state.NodeState{ + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 1536 * mb, + Free: 1536 * mb, + Reserved: 0, + SystemReserved: 512 * mb, + TotalMemSize: 3 * gb, + }, + hugepages1Gi: { + Allocatable: gb, + Free: gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: gb, + }, + }, + NumberOfAssignments: 0, + Nodes: []int{0}, + }, + }, + systemReserved: systemReservedMemory{ + 0: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 512 * mb, + }, + }, + machineInfo: &cadvisorapi.MachineInfo{ + Topology: []cadvisorapi.Node{ + { + Id: 0, + Memory: 3 * gb, + HugePages: []cadvisorapi.HugePagesInfo{ + { + // size in KB + PageSize: pageSize1Gb, + NumPages: 1, + }, + }, + }, + }, + }, + }, + { + description: "should fail when machine state does not have all NUMA nodes", + machineState: state.NodeMap{ + 0: &state.NodeState{ + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 1536 * mb, + Free: 1536 * mb, + Reserved: 0, + SystemReserved: 512 * mb, + TotalMemSize: 2 * gb, + }, + hugepages1Gi: { + Allocatable: gb, + Free: gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: gb, + }, + }, + Nodes: []int{0}, + NumberOfAssignments: 0, + }, + }, + systemReserved: systemReservedMemory{ + 0: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 512 * mb, + }, + }, + machineInfo: &cadvisorapi.MachineInfo{ + Topology: []cadvisorapi.Node{ + { + Id: 0, + Memory: 2 * gb, + HugePages: []cadvisorapi.HugePagesInfo{ + { + // size in KB + PageSize: pageSize1Gb, + NumPages: 1, + }, + }, + }, + { + Id: 1, + Memory: 2 * gb, + HugePages: []cadvisorapi.HugePagesInfo{ + { + // size in KB + PageSize: pageSize1Gb, + NumPages: 1, + }, + }, + }, + }, + }, + expectedError: fmt.Errorf("[memorymanager] the expected machine state is different from the real one"), + }, + { + description: "should fail when machine state does not have memory resource", + machineState: state.NodeMap{ + 0: &state.NodeState{ + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + hugepages1Gi: { + Allocatable: gb, + Free: gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: gb, + }, + }, + Nodes: []int{0}, + NumberOfAssignments: 0, + }, + }, + machineInfo: &cadvisorapi.MachineInfo{ + Topology: []cadvisorapi.Node{ + { + Id: 0, + Memory: 2 * gb, + HugePages: []cadvisorapi.HugePagesInfo{ + { + // size in KB + PageSize: pageSize1Gb, + NumPages: 1, + }, + }, + }, + }, + }, + systemReserved: systemReservedMemory{ + 0: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 512 * mb, + }, + }, + expectedError: fmt.Errorf("[memorymanager] the expected machine state is different from the real one"), + }, + { + description: "should fail when machine state has wrong size of total memory", + machineState: state.NodeMap{ + 0: &state.NodeState{ + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 1536 * mb, + Free: 1536 * mb, + Reserved: 0, + SystemReserved: 512 * mb, + TotalMemSize: 1536 * mb, + }, + }, + Nodes: []int{0}, + NumberOfAssignments: 0, + }, + }, + systemReserved: systemReservedMemory{ + 0: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 512 * mb, + }, + }, + machineInfo: &cadvisorapi.MachineInfo{ + Topology: []cadvisorapi.Node{ + { + Id: 0, + Memory: 2 * gb, + HugePages: []cadvisorapi.HugePagesInfo{ + { + // size in KB + PageSize: pageSize1Gb, + NumPages: 1, + }, + }, + }, + }, + }, + expectedError: fmt.Errorf("[memorymanager] the expected machine state is different from the real one"), + }, + { + description: "should fail when machine state has wrong size of system reserved memory", + machineState: state.NodeMap{ + 0: &state.NodeState{ + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 1536 * mb, + Free: 1536 * mb, + Reserved: 0, + SystemReserved: 1024, + TotalMemSize: 2 * gb, + }, + }, + Nodes: []int{0}, + NumberOfAssignments: 0, + }, + }, + systemReserved: systemReservedMemory{ + 0: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 512 * mb, + }, + }, + machineInfo: &cadvisorapi.MachineInfo{ + Topology: []cadvisorapi.Node{ + { + Id: 0, + Memory: 2 * gb, + HugePages: []cadvisorapi.HugePagesInfo{ + { + // size in KB + PageSize: pageSize1Gb, + NumPages: 1, + }, + }, + }, + }, + }, + expectedError: fmt.Errorf("[memorymanager] the expected machine state is different from the real one"), + }, + { + description: "should fail when machine state reserved memory is different from the memory of all containers memory assignments", + assignments: state.ContainerMemoryAssignments{ + "pod": map[string][]state.Block{ + "container1": { + { + NUMAAffinity: []int{0}, + Type: v1.ResourceMemory, + Size: 512 * mb, + }, + }, + }, + }, + machineState: state.NodeMap{ + 0: &state.NodeState{ + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 1536 * mb, + Free: 1536 * mb, + Reserved: 0, + SystemReserved: 512 * mb, + TotalMemSize: 2 * gb, + }, + }, + Nodes: []int{0}, + NumberOfAssignments: 1, + }, + }, + systemReserved: systemReservedMemory{ + 0: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 512 * mb, + }, + }, + machineInfo: &cadvisorapi.MachineInfo{ + Topology: []cadvisorapi.Node{ + { + Id: 0, + Memory: 2 * gb, + HugePages: []cadvisorapi.HugePagesInfo{ + { + // size in KB + PageSize: pageSize1Gb, + NumPages: 1, + }, + }, + }, + }, + }, + expectedError: fmt.Errorf("[memorymanager] the expected machine state is different from the real one"), + }, + { + description: "should fail when machine state has wrong size of hugepages", + machineState: state.NodeMap{ + 0: &state.NodeState{ + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 1536 * mb, + Free: 1536 * mb, + Reserved: 0, + SystemReserved: 512 * mb, + TotalMemSize: 2 * gb, + }, + hugepages1Gi: { + Allocatable: gb, + Free: gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: gb, + }, + }, + Nodes: []int{0}, + NumberOfAssignments: 0, + }, + }, + systemReserved: systemReservedMemory{ + 0: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 512 * mb, + }, + }, + machineInfo: &cadvisorapi.MachineInfo{ + Topology: []cadvisorapi.Node{ + { + Id: 0, + Memory: 2 * gb, + HugePages: []cadvisorapi.HugePagesInfo{ + { + // size in KB + PageSize: pageSize1Gb, + NumPages: 2, + }, + }, + }, + }, + }, + expectedError: fmt.Errorf("[memorymanager] the expected machine state is different from the real one"), + }, + { + description: "should fail when machine state has wrong size of system reserved hugepages", + machineState: state.NodeMap{ + 0: &state.NodeState{ + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 1536 * mb, + Free: 1536 * mb, + Reserved: 0, + SystemReserved: 512 * mb, + TotalMemSize: 2 * gb, + }, + hugepages1Gi: { + Allocatable: gb, + Free: gb, + Reserved: 0, + SystemReserved: gb, + TotalMemSize: 2 * gb, + }, + }, + Nodes: []int{0}, + NumberOfAssignments: 0, + }, + }, + systemReserved: systemReservedMemory{ + 0: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 512 * mb, + }, + }, + machineInfo: &cadvisorapi.MachineInfo{ + Topology: []cadvisorapi.Node{ + { + Id: 0, + Memory: 2 * gb, + HugePages: []cadvisorapi.HugePagesInfo{ + { + // size in KB + PageSize: pageSize1Gb, + NumPages: 2, + }, + }, + }, + }, + }, + expectedError: fmt.Errorf("[memorymanager] the expected machine state is different from the real one"), + }, + { + description: "should fail when the hugepages reserved machine state is different from the hugepages of all containers memory assignments", + assignments: state.ContainerMemoryAssignments{ + "pod1": map[string][]state.Block{ + "container1": { + { + NUMAAffinity: []int{0}, + Type: hugepages1Gi, + Size: gb, + }, + }, + }, + "pod2": map[string][]state.Block{ + "container2": { + { + NUMAAffinity: []int{0}, + Type: hugepages1Gi, + Size: gb, + }, + }, + }, + }, + machineState: state.NodeMap{ + 0: &state.NodeState{ + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 1536 * mb, + Free: 1536 * mb, + Reserved: 0, + SystemReserved: 512 * mb, + TotalMemSize: 2 * gb, + }, + hugepages1Gi: { + Allocatable: 4 * gb, + Free: gb, + Reserved: 3 * gb, + SystemReserved: 0, + TotalMemSize: 4 * gb, + }, + }, + Nodes: []int{0}, + NumberOfAssignments: 2, + }, + }, + systemReserved: systemReservedMemory{ + 0: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 512 * mb, + }, + }, + machineInfo: &cadvisorapi.MachineInfo{ + Topology: []cadvisorapi.Node{ + { + Id: 0, + Memory: 2 * gb, + HugePages: []cadvisorapi.HugePagesInfo{ + { + // size in KB + PageSize: pageSize1Gb, + NumPages: 4, + }, + }, + }, + }, + }, + expectedError: fmt.Errorf("[memorymanager] the expected machine state is different from the real one"), + }, + { + description: "should fail when machine state does not have NUMA node that used under the memory assignment", + assignments: state.ContainerMemoryAssignments{ + "pod1": map[string][]state.Block{ + "container1": { + { + NUMAAffinity: []int{1}, + Type: v1.ResourceMemory, + Size: gb, + }, + }, + }, + }, + machineState: state.NodeMap{ + 0: &state.NodeState{ + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 1536 * mb, + Free: 1536 * mb, + Reserved: 0, + SystemReserved: 512 * mb, + TotalMemSize: 2 * gb, + }, + hugepages1Gi: { + Allocatable: gb, + Free: gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: gb, + }, + }, + Nodes: []int{0}, + NumberOfAssignments: 0, + }, + }, + systemReserved: systemReservedMemory{ + 0: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 512 * mb, + }, + }, + machineInfo: &cadvisorapi.MachineInfo{ + Topology: []cadvisorapi.Node{ + { + Id: 0, + Memory: 2 * gb, + HugePages: []cadvisorapi.HugePagesInfo{ + { + // size in KB + PageSize: pageSize1Gb, + NumPages: 1, + }, + }, + }, + }, + }, + expectedError: fmt.Errorf("[memorymanager] (pod: pod1, container: container1) the memory assignment uses the NUMA that does not exist"), + }, + { + description: "should fail when machine state does not have resource that used under the memory assignment", + assignments: state.ContainerMemoryAssignments{ + "pod1": map[string][]state.Block{ + "container1": { + { + NUMAAffinity: []int{0}, + Type: v1.ResourceMemory, + Size: gb, + }, + { + NUMAAffinity: []int{0}, + Type: hugepages2M, + Size: gb, + }, + }, + }, + }, + machineState: state.NodeMap{ + 0: &state.NodeState{ + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 1536 * mb, + Free: 1536 * mb, + Reserved: 0, + SystemReserved: 512 * mb, + TotalMemSize: 2 * gb, + }, + hugepages1Gi: { + Allocatable: gb, + Free: gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: gb, + }, + }, + Nodes: []int{0}, + NumberOfAssignments: 2, + }, + }, + systemReserved: systemReservedMemory{ + 0: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 512 * mb, + }, + }, + machineInfo: &cadvisorapi.MachineInfo{ + Topology: []cadvisorapi.Node{ + { + Id: 0, + Memory: 2 * gb, + HugePages: []cadvisorapi.HugePagesInfo{ + { + // size in KB + PageSize: pageSize1Gb, + NumPages: 1, + }, + }, + }, + }, + }, + expectedError: fmt.Errorf("[memorymanager] (pod: pod1, container: container1) the memory assignment uses memory resource that does not exist"), + }, + { + description: "should fail when machine state number of assignments is different from the expected one", + assignments: state.ContainerMemoryAssignments{ + "pod1": map[string][]state.Block{ + "container1": { + { + NUMAAffinity: []int{0}, + Type: v1.ResourceMemory, + Size: gb, + }, + { + NUMAAffinity: []int{0}, + Type: hugepages1Gi, + Size: gb, + }, + }, + }, + }, + machineState: state.NodeMap{ + 0: &state.NodeState{ + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 1536 * mb, + Free: 1536 * mb, + Reserved: 0, + SystemReserved: 512 * mb, + TotalMemSize: 2 * gb, + }, + hugepages1Gi: { + Allocatable: gb, + Free: gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: gb, + }, + }, + Nodes: []int{0}, + NumberOfAssignments: 1, + }, + }, + systemReserved: systemReservedMemory{ + 0: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 512 * mb, + }, + }, + machineInfo: &cadvisorapi.MachineInfo{ + Topology: []cadvisorapi.Node{ + { + Id: 0, + Memory: 2 * gb, + HugePages: []cadvisorapi.HugePagesInfo{ + { + // size in KB + PageSize: pageSize1Gb, + NumPages: 1, + }, + }, + }, + }, + }, + expectedError: fmt.Errorf("[memorymanager] the expected machine state is different from the real one"), + }, + { + description: "should validate cross NUMA reserved memory vs container assignments", + assignments: state.ContainerMemoryAssignments{ + "pod1": map[string][]state.Block{ + "container1": { + { + NUMAAffinity: []int{0, 1}, + Type: v1.ResourceMemory, + Size: 768 * mb, + }, + { + NUMAAffinity: []int{0, 1}, + Type: hugepages1Gi, + Size: gb, + }, + }, + }, + "pod2": map[string][]state.Block{ + "container2": { + { + NUMAAffinity: []int{0, 1}, + Type: v1.ResourceMemory, + Size: 256 * mb, + }, + { + NUMAAffinity: []int{0, 1}, + Type: hugepages1Gi, + Size: gb, + }, + }, + }, + }, + expectedAssignments: state.ContainerMemoryAssignments{ + "pod1": map[string][]state.Block{ + "container1": { + { + NUMAAffinity: []int{0, 1}, + Type: v1.ResourceMemory, + Size: 768 * mb, + }, + { + NUMAAffinity: []int{0, 1}, + Type: hugepages1Gi, + Size: gb, + }, + }, + }, + "pod2": map[string][]state.Block{ + "container2": { + { + NUMAAffinity: []int{0, 1}, + Type: v1.ResourceMemory, + Size: 256 * mb, + }, + { + NUMAAffinity: []int{0, 1}, + Type: hugepages1Gi, + Size: gb, + }, + }, + }, + }, + machineState: state.NodeMap{ + 0: &state.NodeState{ + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 640 * mb, + Free: 0, + Reserved: 640 * mb, + SystemReserved: 512 * mb, + TotalMemSize: 2176 * mb, + }, + hugepages1Gi: { + Allocatable: gb, + Free: 0, + Reserved: gb, + SystemReserved: 0, + TotalMemSize: gb, + }, + }, + Nodes: []int{0, 1}, + NumberOfAssignments: 4, + }, + 1: &state.NodeState{ + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 640 * mb, + Free: 256 * mb, + Reserved: 384 * mb, + SystemReserved: 512 * mb, + TotalMemSize: 2176 * mb, + }, + hugepages1Gi: { + Allocatable: gb, + Free: 0, + Reserved: gb, + SystemReserved: 0, + TotalMemSize: gb, + }, + }, + Nodes: []int{0, 1}, + NumberOfAssignments: 4, + }, + }, + expectedMachineState: state.NodeMap{ + 0: &state.NodeState{ + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 640 * mb, + Free: 0, + Reserved: 640 * mb, + SystemReserved: 512 * mb, + TotalMemSize: 2176 * mb, + }, + hugepages1Gi: { + Allocatable: gb, + Free: 0, + Reserved: gb, + SystemReserved: 0, + TotalMemSize: gb, + }, + }, + Nodes: []int{0, 1}, + NumberOfAssignments: 4, + }, + 1: &state.NodeState{ + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 640 * mb, + Free: 256 * mb, + Reserved: 384 * mb, + SystemReserved: 512 * mb, + TotalMemSize: 2176 * mb, + }, + hugepages1Gi: { + Allocatable: gb, + Free: 0, + Reserved: gb, + SystemReserved: 0, + TotalMemSize: gb, + }, + }, + Nodes: []int{0, 1}, + NumberOfAssignments: 4, + }, + }, + systemReserved: systemReservedMemory{ + 0: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 512 * mb, + }, + 1: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 512 * mb, + }, + }, + machineInfo: &cadvisorapi.MachineInfo{ + Topology: []cadvisorapi.Node{ + { + Id: 0, + Memory: 2176 * mb, + HugePages: []cadvisorapi.HugePagesInfo{ + { + // size in KB + PageSize: pageSize1Gb, + NumPages: 1, + }, + }, + }, + { + Id: 1, + Memory: 2176 * mb, + HugePages: []cadvisorapi.HugePagesInfo{ + { + // size in KB + PageSize: pageSize1Gb, + NumPages: 1, + }, + }, + }, + }, + }, + }, + } + + for _, testCase := range testCases { + t.Run(testCase.description, func(t *testing.T) { + klog.Infof("[Start] %s", testCase.description) + p, s, err := initTests(&testCase, nil) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + + err = p.Start(s) + if !reflect.DeepEqual(err, testCase.expectedError) { + t.Fatalf("The actual error: %v is different from the expected one: %v", err, testCase.expectedError) + } + + if err != nil { + return + } + + assignments := s.GetMemoryAssignments() + if !areContainerMemoryAssignmentsEqual(assignments, testCase.expectedAssignments) { + t.Fatalf("Actual assignments: %v is different from the expected one: %v", assignments, testCase.expectedAssignments) + } + + machineState := s.GetMachineState() + if !areMachineStatesEqual(machineState, testCase.expectedMachineState) { + t.Fatalf("The actual machine state: %v is different from the expected one: %v", machineState, testCase.expectedMachineState) + } + }) + } +} + +func TestStaticPolicyAllocate(t *testing.T) { + testCases := []testStaticPolicy{ + { + description: "should do nothing for non-guaranteed pods", + expectedAssignments: state.ContainerMemoryAssignments{}, + machineState: state.NodeMap{ + 0: &state.NodeState{ + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 1536 * mb, + Free: 1536 * mb, + Reserved: 0, + SystemReserved: 512 * mb, + TotalMemSize: 2 * gb, + }, + hugepages1Gi: { + Allocatable: gb, + Free: gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: gb, + }, + }, + Nodes: []int{}, + }, + }, + expectedMachineState: state.NodeMap{ + 0: &state.NodeState{ + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 1536 * mb, + Free: 1536 * mb, + Reserved: 0, + SystemReserved: 512 * mb, + TotalMemSize: 2 * gb, + }, + hugepages1Gi: { + Allocatable: gb, + Free: gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: gb, + }, + }, + Nodes: []int{}, + }, + }, + systemReserved: systemReservedMemory{ + 0: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 512 * mb, + }, + }, + pod: getPod("pod1", "container1", requirementsBurstable), + expectedTopologyHints: nil, + topologyHint: &topologymanager.TopologyHint{}, + }, + { + description: "should do nothing once container already exists under the state file", + assignments: state.ContainerMemoryAssignments{ + "pod1": map[string][]state.Block{ + "container1": { + { + NUMAAffinity: []int{0}, + Type: v1.ResourceMemory, + Size: gb, + }, + }, + }, + }, + expectedAssignments: state.ContainerMemoryAssignments{ + "pod1": map[string][]state.Block{ + "container1": { + { + NUMAAffinity: []int{0}, + Type: v1.ResourceMemory, + Size: gb, + }, + }, + }, + }, + machineState: state.NodeMap{ + 0: &state.NodeState{ + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 1536 * mb, + Free: 512 * mb, + Reserved: 1024 * mb, + SystemReserved: 512 * mb, + TotalMemSize: 2 * gb, + }, + hugepages1Gi: { + Allocatable: gb, + Free: gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: gb, + }, + }, + Nodes: []int{}, + }, + }, + expectedMachineState: state.NodeMap{ + 0: &state.NodeState{ + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 1536 * mb, + Free: 512 * mb, + Reserved: 1024 * mb, + SystemReserved: 512 * mb, + TotalMemSize: 2 * gb, + }, + hugepages1Gi: { + Allocatable: gb, + Free: gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: gb, + }, + }, + Nodes: []int{}, + }, + }, + systemReserved: systemReservedMemory{ + 0: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 512 * mb, + }, + }, + pod: getPod("pod1", "container1", requirementsGuaranteed), + expectedTopologyHints: nil, + topologyHint: &topologymanager.TopologyHint{}, + }, + { + description: "should calculate a default topology hint when no NUMA affinity was provided by the topology manager hint", + assignments: state.ContainerMemoryAssignments{}, + expectedAssignments: state.ContainerMemoryAssignments{ + "pod1": map[string][]state.Block{ + "container1": { + { + NUMAAffinity: []int{0}, + Type: v1.ResourceMemory, + Size: gb, + }, + { + NUMAAffinity: []int{0}, + Type: hugepages1Gi, + Size: gb, + }, + }, + }, + }, + machineState: state.NodeMap{ + 0: &state.NodeState{ + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 1536 * mb, + Free: 1536 * mb, + Reserved: 0, + SystemReserved: 512 * mb, + TotalMemSize: 2 * gb, + }, + hugepages1Gi: { + Allocatable: gb, + Free: gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: gb, + }, + }, + Nodes: []int{0}, + }, + }, + expectedMachineState: state.NodeMap{ + 0: &state.NodeState{ + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 1536 * mb, + Free: 512 * mb, + Reserved: 1024 * mb, + SystemReserved: 512 * mb, + TotalMemSize: 2 * gb, + }, + hugepages1Gi: { + Allocatable: gb, + Free: 0, + Reserved: gb, + SystemReserved: 0, + TotalMemSize: gb, + }, + }, + Nodes: []int{0}, + NumberOfAssignments: 2, + }, + }, + systemReserved: systemReservedMemory{ + 0: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 512 * mb, + }, + }, + pod: getPod("pod1", "container1", requirementsGuaranteed), + topologyHint: &topologymanager.TopologyHint{}, + }, + { + description: "should fail when no NUMA affinity was provided under the topology manager hint and calculation of the default hint failed", + assignments: state.ContainerMemoryAssignments{ + "pod1": map[string][]state.Block{ + "container1": { + { + NUMAAffinity: []int{0}, + Type: v1.ResourceMemory, + Size: gb, + }, + { + NUMAAffinity: []int{0}, + Type: hugepages1Gi, + Size: gb, + }, + }, + }, + }, + machineState: state.NodeMap{ + 0: &state.NodeState{ + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 1536 * mb, + Free: 512 * mb, + Reserved: 1024 * mb, + SystemReserved: 512 * mb, + TotalMemSize: 2 * gb, + }, + hugepages1Gi: { + Allocatable: gb, + Free: 0, + Reserved: gb, + SystemReserved: 0, + TotalMemSize: gb, + }, + }, + Nodes: []int{0}, + NumberOfAssignments: 2, + }, + }, + systemReserved: systemReservedMemory{ + 0: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 512 * mb, + }, + }, + pod: getPod("pod2", "container2", requirementsGuaranteed), + expectedError: fmt.Errorf("[memorymanager] failed to get the default NUMA affinity, no NUMA nodes with enough memory is available"), + topologyHint: &topologymanager.TopologyHint{}, + }, + { + description: "should fail when no NUMA affinity was provided under the topology manager preferred hint and default hint has preferred false", + assignments: state.ContainerMemoryAssignments{ + "pod1": map[string][]state.Block{ + "container1": { + { + NUMAAffinity: []int{0}, + Type: v1.ResourceMemory, + Size: 512 * mb, + }, + }, + }, + }, + machineState: state.NodeMap{ + 0: &state.NodeState{ + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: gb, + Free: 512 * mb, + Reserved: 512 * mb, + SystemReserved: 512 * mb, + TotalMemSize: 1536 * mb, + }, + hugepages1Gi: { + Allocatable: gb, + Free: gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: gb, + }, + }, + Nodes: []int{0}, + NumberOfAssignments: 1, + }, + 1: &state.NodeState{ + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 512 * mb, + Free: 512 * mb, + Reserved: 0, + SystemReserved: 512 * mb, + TotalMemSize: 1536 * mb, + }, + hugepages1Gi: { + Allocatable: gb, + Free: gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: gb, + }, + }, + Nodes: []int{1}, + NumberOfAssignments: 0, + }, + 2: &state.NodeState{ + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 512 * mb, + Free: 512 * mb, + Reserved: 0, + SystemReserved: 512 * mb, + TotalMemSize: 1536 * mb, + }, + hugepages1Gi: { + Allocatable: gb, + Free: gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: gb, + }, + }, + Nodes: []int{2}, + NumberOfAssignments: 0, + }, + }, + systemReserved: systemReservedMemory{ + 0: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 512 * mb, + }, + 1: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 512 * mb, + }, + 2: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 512 * mb, + }, + }, + pod: getPod("pod2", "container2", requirementsGuaranteed), + expectedError: fmt.Errorf("[memorymanager] failed to find the default preferred hint"), + topologyHint: &topologymanager.TopologyHint{Preferred: true}, + }, + { + description: "should fail when NUMA affinity provided under the topology manager hint did not satisfy container requirements and extended hint generation failed", + machineState: state.NodeMap{ + 0: &state.NodeState{ + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 512 * mb, + Free: 512 * mb, + Reserved: 0, + SystemReserved: 512 * mb, + TotalMemSize: gb, + }, + hugepages1Gi: { + Allocatable: gb, + Free: gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: gb, + }, + }, + Nodes: []int{0}, + NumberOfAssignments: 0, + }, + 1: &state.NodeState{ + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 1536 * mb, + Free: 512 * mb, + Reserved: gb, + SystemReserved: 512 * mb, + TotalMemSize: 2 * gb, + }, + hugepages1Gi: { + Allocatable: gb, + Free: gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: gb, + }, + }, + Nodes: []int{1, 2}, + NumberOfAssignments: 1, + }, + 2: &state.NodeState{ + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 1536 * mb, + Free: 512 * mb, + Reserved: gb, + SystemReserved: 512 * mb, + TotalMemSize: 2 * gb, + }, + hugepages1Gi: { + Allocatable: gb, + Free: gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: gb, + }, + }, + Nodes: []int{1, 2}, + NumberOfAssignments: 1, + }, + }, + systemReserved: systemReservedMemory{ + 0: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 512 * mb, + }, + 1: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 512 * mb, + }, + 2: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 512 * mb, + }, + }, + pod: getPod("pod1", "container1", requirementsGuaranteed), + expectedError: fmt.Errorf("[memorymanager] failed to find NUMA nodes to extend the current topology hint"), + topologyHint: &topologymanager.TopologyHint{NUMANodeAffinity: newNUMAAffinity(0), Preferred: false}, + }, + { + description: "should fail when the topology manager provided the preferred hint and extended hint has preferred false", + assignments: state.ContainerMemoryAssignments{ + "pod1": map[string][]state.Block{ + "container1": { + { + NUMAAffinity: []int{0}, + Type: v1.ResourceMemory, + Size: 512 * mb, + }, + }, + }, + }, + machineState: state.NodeMap{ + 0: &state.NodeState{ + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: gb, + Free: 512 * mb, + Reserved: 512 * mb, + SystemReserved: 512 * mb, + TotalMemSize: 1536 * mb, + }, + hugepages1Gi: { + Allocatable: gb, + Free: gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: gb, + }, + }, + Nodes: []int{0}, + NumberOfAssignments: 1, + }, + 1: &state.NodeState{ + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 512 * mb, + Free: 512 * mb, + Reserved: 0, + SystemReserved: 512 * mb, + TotalMemSize: 1536 * mb, + }, + hugepages1Gi: { + Allocatable: gb, + Free: gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: gb, + }, + }, + Nodes: []int{1}, + NumberOfAssignments: 0, + }, + 2: &state.NodeState{ + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 512 * mb, + Free: 512 * mb, + Reserved: 0, + SystemReserved: 512 * mb, + TotalMemSize: 1536 * mb, + }, + hugepages1Gi: { + Allocatable: gb, + Free: gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: gb, + }, + }, + Nodes: []int{2}, + NumberOfAssignments: 0, + }, + }, + systemReserved: systemReservedMemory{ + 0: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 512 * mb, + }, + 1: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 512 * mb, + }, + 2: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 512 * mb, + }, + }, + pod: getPod("pod2", "container2", requirementsGuaranteed), + expectedError: fmt.Errorf("[memorymanager] failed to find the extended preferred hint"), + topologyHint: &topologymanager.TopologyHint{NUMANodeAffinity: newNUMAAffinity(1), Preferred: true}, + }, + { + description: "should succeed to allocate memory from multiple NUMA nodes", + assignments: state.ContainerMemoryAssignments{}, + expectedAssignments: state.ContainerMemoryAssignments{ + "pod1": map[string][]state.Block{ + "container1": { + { + NUMAAffinity: []int{0, 1}, + Type: v1.ResourceMemory, + Size: gb, + }, + { + NUMAAffinity: []int{0, 1}, + Type: hugepages1Gi, + Size: gb, + }, + }, + }, + }, + machineState: state.NodeMap{ + 0: &state.NodeState{ + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 512 * mb, + Free: 512 * mb, + Reserved: 0, + SystemReserved: 512 * mb, + TotalMemSize: gb, + }, + hugepages1Gi: { + Allocatable: gb, + Free: gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: gb, + }, + }, + Nodes: []int{0}, + NumberOfAssignments: 0, + }, + 1: &state.NodeState{ + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 512 * mb, + Free: 512 * mb, + Reserved: 0, + SystemReserved: 512 * mb, + TotalMemSize: gb, + }, + hugepages1Gi: { + Allocatable: gb, + Free: gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: gb, + }, + }, + Nodes: []int{1}, + NumberOfAssignments: 0, + }, + 2: &state.NodeState{ + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 512 * mb, + Free: 512 * mb, + Reserved: 0, + SystemReserved: 512 * mb, + TotalMemSize: gb, + }, + hugepages1Gi: { + Allocatable: gb, + Free: gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: gb, + }, + }, + Nodes: []int{2}, + NumberOfAssignments: 0, + }, + 3: &state.NodeState{ + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 512 * mb, + Free: 512 * mb, + Reserved: 0, + SystemReserved: 512 * mb, + TotalMemSize: gb, + }, + hugepages1Gi: { + Allocatable: gb, + Free: gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: gb, + }, + }, + Nodes: []int{3}, + NumberOfAssignments: 0, + }, + }, + expectedMachineState: state.NodeMap{ + 0: &state.NodeState{ + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 512 * mb, + Free: 0, + Reserved: 512 * mb, + SystemReserved: 512 * mb, + TotalMemSize: gb, + }, + hugepages1Gi: { + Allocatable: gb, + Free: 0, + Reserved: gb, + SystemReserved: 0, + TotalMemSize: gb, + }, + }, + Nodes: []int{0, 1}, + NumberOfAssignments: 2, + }, + 1: &state.NodeState{ + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 512 * mb, + Free: 0, + Reserved: 512 * mb, + SystemReserved: 512 * mb, + TotalMemSize: gb, + }, + hugepages1Gi: { + Allocatable: gb, + Free: gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: gb, + }, + }, + Nodes: []int{0, 1}, + NumberOfAssignments: 2, + }, + 2: &state.NodeState{ + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 512 * mb, + Free: 512 * mb, + Reserved: 0, + SystemReserved: 512 * mb, + TotalMemSize: gb, + }, + hugepages1Gi: { + Allocatable: gb, + Free: gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: gb, + }, + }, + Nodes: []int{2}, + NumberOfAssignments: 0, + }, + 3: &state.NodeState{ + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 512 * mb, + Free: 512 * mb, + Reserved: 0, + SystemReserved: 512 * mb, + TotalMemSize: gb, + }, + hugepages1Gi: { + Allocatable: gb, + Free: gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: gb, + }, + }, + Nodes: []int{3}, + NumberOfAssignments: 0, + }, + }, + systemReserved: systemReservedMemory{ + 0: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 512 * mb, + }, + 1: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 512 * mb, + }, + 2: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 512 * mb, + }, + 3: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 512 * mb, + }, + }, + pod: getPod("pod1", "container1", requirementsGuaranteed), + topologyHint: &topologymanager.TopologyHint{Preferred: true}, + }, + } + + for _, testCase := range testCases { + t.Run(testCase.description, func(t *testing.T) { + klog.Infof("TestStaticPolicyAllocate %s", testCase.description) + p, s, err := initTests(&testCase, testCase.topologyHint) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + + err = p.Allocate(s, testCase.pod, &testCase.pod.Spec.Containers[0]) + if !reflect.DeepEqual(err, testCase.expectedError) { + t.Fatalf("The actual error %v is different from the expected one %v", err, testCase.expectedError) + } + + if err != nil { + return + } + + assignments := s.GetMemoryAssignments() + if !areContainerMemoryAssignmentsEqual(assignments, testCase.expectedAssignments) { + t.Fatalf("Actual assignments %v are different from the expected %v", assignments, testCase.expectedAssignments) + } + + machineState := s.GetMachineState() + if !areMachineStatesEqual(machineState, testCase.expectedMachineState) { + t.Fatalf("The actual machine state %v is different from the expected %v", machineState, testCase.expectedMachineState) + } + }) + } +} + +func TestStaticPolicyRemoveContainer(t *testing.T) { + testCases := []testStaticPolicy{ + { + description: "should do nothing when the container does not exist under the state", + expectedAssignments: state.ContainerMemoryAssignments{}, + machineState: state.NodeMap{ + 0: &state.NodeState{ + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 1536 * mb, + Free: 1536 * mb, + Reserved: 0, + SystemReserved: 512 * mb, + TotalMemSize: 2 * gb, + }, + hugepages1Gi: { + Allocatable: gb, + Free: gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: gb, + }, + }, + Nodes: []int{}, + }, + }, + expectedMachineState: state.NodeMap{ + 0: &state.NodeState{ + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 1536 * mb, + Free: 1536 * mb, + Reserved: 0, + SystemReserved: 512 * mb, + TotalMemSize: 2 * gb, + }, + hugepages1Gi: { + Allocatable: gb, + Free: gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: gb, + }, + }, + Nodes: []int{}, + }, + }, + systemReserved: systemReservedMemory{ + 0: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 512 * mb, + }, + }, + }, + { + description: "should delete the container assignment and update the machine state", + assignments: state.ContainerMemoryAssignments{ + "pod1": map[string][]state.Block{ + "container1": { + { + NUMAAffinity: []int{0}, + Type: v1.ResourceMemory, + Size: gb, + }, + { + NUMAAffinity: []int{0}, + Type: hugepages1Gi, + Size: gb, + }, + }, + }, + }, + expectedAssignments: state.ContainerMemoryAssignments{}, + machineState: state.NodeMap{ + 0: &state.NodeState{ + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 1536 * mb, + Free: 512 * mb, + Reserved: 1024 * mb, + SystemReserved: 512 * mb, + TotalMemSize: 2 * gb, + }, + hugepages1Gi: { + Allocatable: gb, + Free: 0, + Reserved: gb, + SystemReserved: 0, + TotalMemSize: gb, + }, + }, + NumberOfAssignments: 2, + Nodes: []int{0}, + }, + }, + expectedMachineState: state.NodeMap{ + 0: &state.NodeState{ + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 1536 * mb, + Free: 1536 * mb, + Reserved: 0, + SystemReserved: 512 * mb, + TotalMemSize: 2 * gb, + }, + hugepages1Gi: { + Allocatable: gb, + Free: gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: gb, + }, + }, + Nodes: []int{0}, + NumberOfAssignments: 0, + }, + }, + systemReserved: systemReservedMemory{ + 0: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 512 * mb, + }, + }, + }, + { + description: "should delete the cross NUMA container assignment and update the machine state", + assignments: state.ContainerMemoryAssignments{ + "pod1": map[string][]state.Block{ + "container1": { + { + NUMAAffinity: []int{0, 1}, + Type: v1.ResourceMemory, + Size: gb, + }, + { + NUMAAffinity: []int{0, 1}, + Type: hugepages1Gi, + Size: gb, + }, + }, + }, + }, + expectedAssignments: state.ContainerMemoryAssignments{}, + machineState: state.NodeMap{ + 0: &state.NodeState{ + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 512 * mb, + Free: 0, + Reserved: 512 * mb, + SystemReserved: 512 * mb, + TotalMemSize: gb, + }, + hugepages1Gi: { + Allocatable: gb, + Free: 0, + Reserved: gb, + SystemReserved: 0, + TotalMemSize: gb, + }, + }, + NumberOfAssignments: 2, + Nodes: []int{0, 1}, + }, + 1: &state.NodeState{ + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 512 * mb, + Free: 0, + Reserved: 512 * mb, + SystemReserved: 512 * mb, + TotalMemSize: gb, + }, + hugepages1Gi: { + Allocatable: gb, + Free: gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: gb, + }, + }, + NumberOfAssignments: 2, + Nodes: []int{0, 1}, + }, + }, + expectedMachineState: state.NodeMap{ + 0: &state.NodeState{ + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 512 * mb, + Free: 512 * mb, + Reserved: 0, + SystemReserved: 512 * mb, + TotalMemSize: gb, + }, + hugepages1Gi: { + Allocatable: gb, + Free: gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: gb, + }, + }, + NumberOfAssignments: 0, + Nodes: []int{0}, + }, + 1: &state.NodeState{ + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 512 * mb, + Free: 512 * mb, + Reserved: 0, + SystemReserved: 512 * mb, + TotalMemSize: gb, + }, + hugepages1Gi: { + Allocatable: gb, + Free: gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: gb, + }, + }, + NumberOfAssignments: 0, + Nodes: []int{1}, + }, + }, + systemReserved: systemReservedMemory{ + 0: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 512 * mb, + }, + 1: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 512 * mb, + }, + }, + }, + } + + for _, testCase := range testCases { + t.Run(testCase.description, func(t *testing.T) { + p, s, err := initTests(&testCase, nil) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + + err = p.RemoveContainer(s, "pod1", "container1") + if !reflect.DeepEqual(err, testCase.expectedError) { + t.Fatalf("The actual error %v is different from the expected one %v", err, testCase.expectedError) + } + + if err != nil { + return + } + + assignments := s.GetMemoryAssignments() + if !areContainerMemoryAssignmentsEqual(assignments, testCase.expectedAssignments) { + t.Fatalf("Actual assignments %v are different from the expected %v", assignments, testCase.expectedAssignments) + } + + machineState := s.GetMachineState() + if !areMachineStatesEqual(machineState, testCase.expectedMachineState) { + t.Fatalf("The actual machine state %v is different from the expected %v", machineState, testCase.expectedMachineState) + } + }) + } +} + +func TestStaticPolicyGetTopologyHints(t *testing.T) { + testCases := []testStaticPolicy{ + { + description: "should not provide topology hints for non-guaranteed pods", + pod: getPod("pod1", "container1", requirementsBurstable), + systemReserved: systemReservedMemory{ + 0: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 512 * mb, + }, + }, + expectedTopologyHints: nil, + }, + { + description: "should provide topology hints based on the existent memory assignment", + assignments: state.ContainerMemoryAssignments{ + "pod1": map[string][]state.Block{ + "container1": { + { + NUMAAffinity: []int{0}, + Type: v1.ResourceMemory, + Size: gb, + }, + { + NUMAAffinity: []int{0}, + Type: hugepages1Gi, + Size: gb, + }, + }, + }, + }, + pod: getPod("pod1", "container1", requirementsGuaranteed), + systemReserved: systemReservedMemory{ + 0: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 512 * mb, + }, + }, + expectedTopologyHints: map[string][]topologymanager.TopologyHint{ + string(v1.ResourceMemory): { + { + NUMANodeAffinity: newNUMAAffinity(0), + Preferred: true, + }, + }, + string(hugepages1Gi): { + { + NUMANodeAffinity: newNUMAAffinity(0), + Preferred: true, + }, + }, + }, + }, + { + description: "should calculate new topology hints, when the container does not exist under assignments", + assignments: state.ContainerMemoryAssignments{ + "pod1": map[string][]state.Block{ + "container1": { + { + NUMAAffinity: []int{0, 1}, + Type: v1.ResourceMemory, + Size: 2 * gb, + }, + { + NUMAAffinity: []int{0, 1}, + Type: hugepages1Gi, + Size: 2 * gb, + }, + }, + }, + }, + machineState: state.NodeMap{ + 0: &state.NodeState{ + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 1536 * mb, + Free: 0, + Reserved: 1536 * mb, + SystemReserved: 512 * mb, + TotalMemSize: 2 * gb, + }, + hugepages1Gi: { + Allocatable: gb, + Free: 0, + Reserved: gb, + SystemReserved: 0, + TotalMemSize: gb, + }, + }, + Nodes: []int{0, 1}, + NumberOfAssignments: 2, + }, + 1: &state.NodeState{ + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 1536 * mb, + Free: gb, + Reserved: 512 * mb, + SystemReserved: 512 * mb, + TotalMemSize: 2 * gb, + }, + hugepages1Gi: { + Allocatable: gb, + Free: 0, + Reserved: gb, + SystemReserved: 0, + TotalMemSize: gb, + }, + }, + Nodes: []int{0, 1}, + NumberOfAssignments: 2, + }, + 2: &state.NodeState{ + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 1536 * mb, + Free: 1536 * mb, + Reserved: 0, + SystemReserved: 512 * mb, + TotalMemSize: 2 * gb, + }, + hugepages1Gi: { + Allocatable: gb, + Free: gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: gb, + }, + }, + Nodes: []int{2}, + NumberOfAssignments: 0, + }, + 3: &state.NodeState{ + MemoryMap: map[v1.ResourceName]*state.MemoryTable{ + v1.ResourceMemory: { + Allocatable: 1536 * mb, + Free: 1536 * mb, + Reserved: 0, + SystemReserved: 512 * mb, + TotalMemSize: 2 * gb, + }, + hugepages1Gi: { + Allocatable: gb, + Free: gb, + Reserved: 0, + SystemReserved: 0, + TotalMemSize: gb, + }, + }, + Nodes: []int{3}, + NumberOfAssignments: 0, + }, + }, + pod: getPod("pod2", "container2", requirementsGuaranteed), + systemReserved: systemReservedMemory{ + 0: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 512 * mb, + }, + 1: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 512 * mb, + }, + 2: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 512 * mb, + }, + 3: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 512 * mb, + }, + }, + expectedTopologyHints: map[string][]topologymanager.TopologyHint{ + string(v1.ResourceMemory): { + { + NUMANodeAffinity: newNUMAAffinity(2), + Preferred: true, + }, + { + NUMANodeAffinity: newNUMAAffinity(3), + Preferred: true, + }, + { + NUMANodeAffinity: newNUMAAffinity(2, 3), + Preferred: false, + }, + }, + string(hugepages1Gi): { + { + NUMANodeAffinity: newNUMAAffinity(2), + Preferred: true, + }, + { + NUMANodeAffinity: newNUMAAffinity(3), + Preferred: true, + }, + { + NUMANodeAffinity: newNUMAAffinity(2, 3), + Preferred: false, + }, + }, + }, + }, + { + description: "should fail when number of existing memory assignment resources are different from resources requested by container", + assignments: state.ContainerMemoryAssignments{ + "pod1": map[string][]state.Block{ + "container1": { + { + NUMAAffinity: []int{0}, + Type: v1.ResourceMemory, + Size: gb, + }, + }, + }, + }, + pod: getPod("pod1", "container1", requirementsGuaranteed), + systemReserved: systemReservedMemory{ + 0: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 512 * mb, + }, + }, + expectedTopologyHints: nil, + }, + { + description: "should fail when existing memory assignment resources are different from resources requested by container", + assignments: state.ContainerMemoryAssignments{ + "pod1": map[string][]state.Block{ + "container1": { + { + NUMAAffinity: []int{0}, + Type: v1.ResourceMemory, + Size: gb, + }, + { + NUMAAffinity: []int{0}, + Type: hugepages2M, + Size: gb, + }, + }, + }, + }, + pod: getPod("pod1", "container1", requirementsGuaranteed), + systemReserved: systemReservedMemory{ + 0: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 512 * mb, + }, + }, + expectedTopologyHints: nil, + }, + { + description: "should fail when existing memory assignment size is different from one requested by the container", + assignments: state.ContainerMemoryAssignments{ + "pod1": map[string][]state.Block{ + "container1": { + { + NUMAAffinity: []int{0}, + Type: v1.ResourceMemory, + Size: 512 * mb, + }, + { + NUMAAffinity: []int{0}, + Type: hugepages1Gi, + Size: gb, + }, + }, + }, + }, + pod: getPod("pod1", "container1", requirementsGuaranteed), + systemReserved: systemReservedMemory{ + 0: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 512 * mb, + }, + }, + expectedTopologyHints: nil, + }, + } + + for _, testCase := range testCases { + t.Run(testCase.description, func(t *testing.T) { + p, s, err := initTests(&testCase, nil) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + + topologyHints := p.GetTopologyHints(s, testCase.pod, &testCase.pod.Spec.Containers[0]) + if !reflect.DeepEqual(topologyHints, testCase.expectedTopologyHints) { + t.Fatalf("The actual topology hints: '%+v' are different from the expected one: '%+v'", topologyHints, testCase.expectedTopologyHints) + } + }) + } +} diff --git a/pkg/kubelet/cm/topologymanager/fake_topology_manager.go b/pkg/kubelet/cm/topologymanager/fake_topology_manager.go index a21e50c555a8..f843f24afe6b 100644 --- a/pkg/kubelet/cm/topologymanager/fake_topology_manager.go +++ b/pkg/kubelet/cm/topologymanager/fake_topology_manager.go @@ -17,7 +17,7 @@ limitations under the License. package topologymanager import ( - "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" "k8s.io/klog/v2" "k8s.io/kubernetes/pkg/kubelet/lifecycle" "k8s.io/kubernetes/pkg/kubelet/util/format" @@ -56,3 +56,21 @@ func (m *fakeManager) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle.PodAd Admit: true, } } + +type fakeManagerWithHint struct { + fakeManager + hint *TopologyHint +} + +// NewFakeManagerWithHint returns an instance of FakeManager with specified topology hints +func NewFakeManagerWithHint(hint *TopologyHint) Manager { + return &fakeManagerWithHint{ + fakeManager: fakeManager{}, + hint: hint, + } +} + +func (m *fakeManagerWithHint) GetAffinity(podUID string, containerName string) TopologyHint { + klog.Infof("[fake topologymanager] GetAffinity podUID: %v container name: %v", podUID, containerName) + return *m.hint +} From 24be74e7596db345aa57277370710968d23a8f43 Mon Sep 17 00:00:00 2001 From: Artyom Lukianov Date: Sun, 11 Oct 2020 12:44:22 +0300 Subject: [PATCH 17/31] memory manager: update bazel files Signed-off-by: Artyom Lukianov --- cmd/kubelet/app/BUILD | 1 + pkg/kubelet/cm/BUILD | 2 + pkg/kubelet/cm/memorymanager/BUILD | 69 +++++++++++++++++++ pkg/kubelet/cm/memorymanager/state/BUILD | 27 +++----- .../src/k8s.io/component-base/cli/flag/BUILD | 2 + 5 files changed, 83 insertions(+), 18 deletions(-) create mode 100644 pkg/kubelet/cm/memorymanager/BUILD diff --git a/cmd/kubelet/app/BUILD b/cmd/kubelet/app/BUILD index bfd511422e25..89826ffa84cc 100644 --- a/cmd/kubelet/app/BUILD +++ b/cmd/kubelet/app/BUILD @@ -18,6 +18,7 @@ go_library( "//cmd/kubelet/app/options:go_default_library", "//pkg/api/legacyscheme:go_default_library", "//pkg/apis/core:go_default_library", + "//pkg/apis/core/v1/helper:go_default_library", "//pkg/capabilities:go_default_library", "//pkg/cloudprovider/providers:go_default_library", "//pkg/credentialprovider:go_default_library", diff --git a/pkg/kubelet/cm/BUILD b/pkg/kubelet/cm/BUILD index 1601d670685b..8c6a70c81077 100644 --- a/pkg/kubelet/cm/BUILD +++ b/pkg/kubelet/cm/BUILD @@ -33,6 +33,7 @@ go_library( "//pkg/features:go_default_library", "//pkg/kubelet/cm/cpumanager:go_default_library", "//pkg/kubelet/cm/cpuset:go_default_library", + "//pkg/kubelet/cm/memorymanager:go_default_library", "//pkg/kubelet/cm/topologymanager:go_default_library", "//pkg/kubelet/config:go_default_library", "//pkg/kubelet/container:go_default_library", @@ -240,6 +241,7 @@ filegroup( "//pkg/kubelet/cm/cpumanager:all-srcs", "//pkg/kubelet/cm/cpuset:all-srcs", "//pkg/kubelet/cm/devicemanager:all-srcs", + "//pkg/kubelet/cm/memorymanager:all-srcs", "//pkg/kubelet/cm/topologymanager:all-srcs", "//pkg/kubelet/cm/util:all-srcs", ], diff --git a/pkg/kubelet/cm/memorymanager/BUILD b/pkg/kubelet/cm/memorymanager/BUILD new file mode 100644 index 000000000000..2ff5be7a9a21 --- /dev/null +++ b/pkg/kubelet/cm/memorymanager/BUILD @@ -0,0 +1,69 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") + +go_library( + name = "go_default_library", + srcs = [ + "fake_memory_manager.go", + "memory_manager.go", + "policy.go", + "policy_none.go", + "policy_static.go", + ], + importpath = "k8s.io/kubernetes/pkg/kubelet/cm/memorymanager", + visibility = ["//visibility:public"], + deps = [ + "//pkg/apis/core/v1/helper:go_default_library", + "//pkg/apis/core/v1/helper/qos:go_default_library", + "//pkg/kubelet/cm/containermap:go_default_library", + "//pkg/kubelet/cm/memorymanager/state:go_default_library", + "//pkg/kubelet/cm/topologymanager:go_default_library", + "//pkg/kubelet/cm/topologymanager/bitmask:go_default_library", + "//pkg/kubelet/config:go_default_library", + "//pkg/kubelet/status:go_default_library", + "//staging/src/k8s.io/api/core/v1:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library", + "//staging/src/k8s.io/cri-api/pkg/apis/runtime/v1alpha2:go_default_library", + "//vendor/github.com/google/cadvisor/info/v1:go_default_library", + "//vendor/k8s.io/klog/v2:go_default_library", + ], +) + +go_test( + name = "go_default_test", + srcs = [ + "memory_manager_test.go", + "policy_static_test.go", + ], + embed = [":go_default_library"], + deps = [ + "//pkg/kubelet/cm/containermap:go_default_library", + "//pkg/kubelet/cm/memorymanager/state:go_default_library", + "//pkg/kubelet/cm/topologymanager:go_default_library", + "//pkg/kubelet/cm/topologymanager/bitmask:go_default_library", + "//staging/src/k8s.io/api/core/v1:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/types:go_default_library", + "//staging/src/k8s.io/cri-api/pkg/apis/runtime/v1alpha2:go_default_library", + "//vendor/github.com/google/cadvisor/info/v1:go_default_library", + "//vendor/github.com/stretchr/testify/assert:go_default_library", + "//vendor/k8s.io/klog/v2:go_default_library", + ], +) + +filegroup( + name = "package-srcs", + srcs = glob(["**"]), + tags = ["automanaged"], + visibility = ["//visibility:private"], +) + +filegroup( + name = "all-srcs", + srcs = [ + ":package-srcs", + "//pkg/kubelet/cm/memorymanager/state:all-srcs", + ], + tags = ["automanaged"], + visibility = ["//visibility:public"], +) diff --git a/pkg/kubelet/cm/memorymanager/state/BUILD b/pkg/kubelet/cm/memorymanager/state/BUILD index 438737c9d181..a0fcfa9d2d61 100644 --- a/pkg/kubelet/cm/memorymanager/state/BUILD +++ b/pkg/kubelet/cm/memorymanager/state/BUILD @@ -6,36 +6,30 @@ go_library( "checkpoint.go", "state.go", "state_checkpoint.go", - "state_file.go", "state_mem.go", ], - importpath = "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state", + importpath = "k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state", visibility = ["//visibility:public"], deps = [ "//pkg/kubelet/checkpointmanager:go_default_library", "//pkg/kubelet/checkpointmanager/checksum:go_default_library", "//pkg/kubelet/checkpointmanager/errors:go_default_library", - "//pkg/kubelet/cm/cpumanager/containermap:go_default_library", - "//pkg/kubelet/cm/cpuset:go_default_library", - "//vendor/github.com/davecgh/go-spew/spew:go_default_library", - "//vendor/k8s.io/klog:go_default_library", + "//pkg/kubelet/cm/containermap:go_default_library", + "//staging/src/k8s.io/api/core/v1:go_default_library", + "//vendor/k8s.io/klog/v2:go_default_library", ], ) go_test( name = "go_default_test", - srcs = [ - "state_checkpoint_test.go", - "state_compatibility_test.go", - "state_file_test.go", - ], + srcs = ["state_checkpoint_test.go"], embed = [":go_default_library"], deps = [ "//pkg/kubelet/checkpointmanager:go_default_library", - "//pkg/kubelet/cm/cpumanager/containermap:go_default_library", + "//pkg/kubelet/cm/containermap:go_default_library", "//pkg/kubelet/cm/cpumanager/state/testing:go_default_library", - "//pkg/kubelet/cm/cpuset:go_default_library", - "//vendor/github.com/stretchr/testify/require:go_default_library", + "//staging/src/k8s.io/api/core/v1:go_default_library", + "//vendor/github.com/stretchr/testify/assert:go_default_library", ], ) @@ -48,10 +42,7 @@ filegroup( filegroup( name = "all-srcs", - srcs = [ - ":package-srcs", - "//pkg/kubelet/cm/cpumanager/state/testing:all-srcs", - ], + srcs = [":package-srcs"], tags = ["automanaged"], visibility = ["//visibility:public"], ) diff --git a/staging/src/k8s.io/component-base/cli/flag/BUILD b/staging/src/k8s.io/component-base/cli/flag/BUILD index 9c52db989892..25d290f7e85e 100644 --- a/staging/src/k8s.io/component-base/cli/flag/BUILD +++ b/staging/src/k8s.io/component-base/cli/flag/BUILD @@ -9,6 +9,7 @@ load( go_test( name = "go_default_test", srcs = [ + "bracket_separated_slice_map_string_string_test.go", "ciphersuites_flag_test.go", "colon_separated_multimap_string_string_test.go", "langle_separated_map_string_string_test.go", @@ -24,6 +25,7 @@ go_test( go_library( name = "go_default_library", srcs = [ + "bracket_separated_slice_map_string_string.go", "ciphersuites_flag.go", "ciphersuites_flag_114.go", "colon_separated_multimap_string_string.go", From 27c5efe8ec596a30b73bc0ee10fef419e6aeca31 Mon Sep 17 00:00:00 2001 From: Artyom Lukianov Date: Mon, 2 Nov 2020 21:41:33 +0200 Subject: [PATCH 18/31] memory manager: fix scheme unit test Signed-off-by: Artyom Lukianov --- pkg/kubelet/apis/config/fuzzer/fuzzer.go | 1 + pkg/kubelet/apis/config/helpers_test.go | 2 ++ .../scheme/testdata/KubeletConfiguration/after/v1beta1.yaml | 1 + .../KubeletConfiguration/roundtrip/default/v1beta1.yaml | 1 + 4 files changed, 5 insertions(+) diff --git a/pkg/kubelet/apis/config/fuzzer/fuzzer.go b/pkg/kubelet/apis/config/fuzzer/fuzzer.go index 104d8215526e..b5192d333de4 100644 --- a/pkg/kubelet/apis/config/fuzzer/fuzzer.go +++ b/pkg/kubelet/apis/config/fuzzer/fuzzer.go @@ -62,6 +62,7 @@ func Funcs(codecs runtimeserializer.CodecFactory) []interface{} { obj.KernelMemcgNotification = false obj.MaxOpenFiles = 1000000 obj.MaxPods = 110 + obj.MemoryManagerPolicy = v1beta1.NoneMemoryManagerPolicy obj.PodPidsLimit = -1 obj.NodeStatusUpdateFrequency = metav1.Duration{Duration: 10 * time.Second} obj.NodeStatusReportFrequency = metav1.Duration{Duration: time.Minute} diff --git a/pkg/kubelet/apis/config/helpers_test.go b/pkg/kubelet/apis/config/helpers_test.go index 8e769634156a..f0787e65889a 100644 --- a/pkg/kubelet/apis/config/helpers_test.go +++ b/pkg/kubelet/apis/config/helpers_test.go @@ -206,6 +206,7 @@ var ( "StaticPodURLHeader[*][*]", "MaxOpenFiles", "MaxPods", + "MemoryManagerPolicy", "NodeLeaseDurationSeconds", "NodeStatusMaxImages", "NodeStatusUpdateFrequency.Duration", @@ -220,6 +221,7 @@ var ( "ReadOnlyPort", "RegistryBurst", "RegistryPullQPS", + "ReservedMemory[*][*]", "ReservedSystemCPUs", "RuntimeRequestTimeout.Duration", "RunOnce", diff --git a/pkg/kubelet/apis/config/scheme/testdata/KubeletConfiguration/after/v1beta1.yaml b/pkg/kubelet/apis/config/scheme/testdata/KubeletConfiguration/after/v1beta1.yaml index 364c43416aec..a945c692e5e2 100644 --- a/pkg/kubelet/apis/config/scheme/testdata/KubeletConfiguration/after/v1beta1.yaml +++ b/pkg/kubelet/apis/config/scheme/testdata/KubeletConfiguration/after/v1beta1.yaml @@ -55,6 +55,7 @@ logging: makeIPTablesUtilChains: true maxOpenFiles: 1000000 maxPods: 110 +memoryManagerPolicy: none nodeLeaseDurationSeconds: 40 nodeStatusMaxImages: 50 nodeStatusReportFrequency: 5m0s diff --git a/pkg/kubelet/apis/config/scheme/testdata/KubeletConfiguration/roundtrip/default/v1beta1.yaml b/pkg/kubelet/apis/config/scheme/testdata/KubeletConfiguration/roundtrip/default/v1beta1.yaml index 364c43416aec..a945c692e5e2 100644 --- a/pkg/kubelet/apis/config/scheme/testdata/KubeletConfiguration/roundtrip/default/v1beta1.yaml +++ b/pkg/kubelet/apis/config/scheme/testdata/KubeletConfiguration/roundtrip/default/v1beta1.yaml @@ -55,6 +55,7 @@ logging: makeIPTablesUtilChains: true maxOpenFiles: 1000000 maxPods: 110 +memoryManagerPolicy: none nodeLeaseDurationSeconds: 40 nodeStatusMaxImages: 50 nodeStatusReportFrequency: 5m0s From aa63e5aed210c6ea3c9a13aa66649cb842bf8dc0 Mon Sep 17 00:00:00 2001 From: Artyom Lukianov Date: Wed, 4 Nov 2020 13:53:32 +0200 Subject: [PATCH 19/31] memory manager: provide an additional validation for reserved memory Calculate the total amount of reserved memory only for NUMA nodes that are existing under the machine. Signed-off-by: Artyom Lukianov --- .../cm/memorymanager/memory_manager.go | 21 ++++++--- .../cm/memorymanager/memory_manager_test.go | 45 ++++++++++++++----- 2 files changed, 51 insertions(+), 15 deletions(-) diff --git a/pkg/kubelet/cm/memorymanager/memory_manager.go b/pkg/kubelet/cm/memorymanager/memory_manager.go index 3f906ac68f99..058f3ad922e4 100644 --- a/pkg/kubelet/cm/memorymanager/memory_manager.go +++ b/pkg/kubelet/cm/memorymanager/memory_manager.go @@ -267,6 +267,7 @@ func (m *manager) GetTopologyHints(pod *v1.Pod, container *v1.Container) map[str return m.policy.GetTopologyHints(m.state, pod, container) } +// TODO: move the method to the upper level, to re-use it under the CPU and memory managers func (m *manager) removeStaleState() { // Only once all sources are ready do we attempt to remove any stale state. // This ensures that the call to `m.activePods()` below will succeed with @@ -319,10 +320,20 @@ func (m *manager) policyRemoveContainerByRef(podUID string, containerName string return err } -func getTotalMemoryTypeReserved(preReservedMemory map[int]map[v1.ResourceName]resource.Quantity) map[v1.ResourceName]resource.Quantity { +func getTotalMemoryTypeReserved(machineInfo *cadvisorapi.MachineInfo, reservedMemory map[int]map[v1.ResourceName]resource.Quantity) map[v1.ResourceName]resource.Quantity { totalMemoryType := map[v1.ResourceName]resource.Quantity{} - for _, node := range preReservedMemory { + numaNodes := map[int]bool{} + for _, numaNode := range machineInfo.Topology { + numaNodes[numaNode.Id] = true + } + + for nodeID, node := range reservedMemory { + if !numaNodes[nodeID] { + klog.Warningf("The NUMA node %d specified under --reserved- memory does not exist on the machine", nodeID) + continue + } + for memType, memVal := range node { if totalMem, exists := totalMemoryType[memType]; exists { memVal.Add(totalMem) @@ -334,8 +345,8 @@ func getTotalMemoryTypeReserved(preReservedMemory map[int]map[v1.ResourceName]re return totalMemoryType } -func validateReservedMemory(nodeAllocatableReservation v1.ResourceList, reservedMemory map[int]map[v1.ResourceName]resource.Quantity) error { - totalMemoryType := getTotalMemoryTypeReserved(reservedMemory) +func validateReservedMemory(machineInfo *cadvisorapi.MachineInfo, nodeAllocatableReservation v1.ResourceList, reservedMemory map[int]map[v1.ResourceName]resource.Quantity) error { + totalMemoryType := getTotalMemoryTypeReserved(machineInfo, reservedMemory) commonMemoryTypeSet := make(map[v1.ResourceName]bool) for resourceType := range totalMemoryType { @@ -391,7 +402,7 @@ func convertReserved(machineInfo *cadvisorapi.MachineInfo, reservedMemory map[in } func getSystemReservedMemory(machineInfo *cadvisorapi.MachineInfo, nodeAllocatableReservation v1.ResourceList, preReservedMemory map[int]map[v1.ResourceName]resource.Quantity) (systemReservedMemory, error) { - if err := validateReservedMemory(nodeAllocatableReservation, preReservedMemory); err != nil { + if err := validateReservedMemory(machineInfo, nodeAllocatableReservation, preReservedMemory); err != nil { return nil, err } diff --git a/pkg/kubelet/cm/memorymanager/memory_manager_test.go b/pkg/kubelet/cm/memorymanager/memory_manager_test.go index 40abd34447ed..edbaa1fed8f4 100644 --- a/pkg/kubelet/cm/memorymanager/memory_manager_test.go +++ b/pkg/kubelet/cm/memorymanager/memory_manager_test.go @@ -146,54 +146,77 @@ func getPod(podUID string, containerName string, requirements *v1.ResourceRequir } func TestValidateReservedMemory(t *testing.T) { + machineInfo := &cadvisorapi.MachineInfo{ + Topology: []cadvisorapi.Node{ + {Id: 0}, + {Id: 1}, + }, + } const msgNotEqual = "the total amount of memory of type \"%s\" is not equal to the value determined by Node Allocatable feature" testCases := []struct { description string nodeAllocatableReservation v1.ResourceList + machineInfo *cadvisorapi.MachineInfo systemReservedMemory map[int]map[v1.ResourceName]resource.Quantity expectedError string }{ { - "Node Allocatable not set, pre-reserved not set", + "Node Allocatable not set, reserved not set", v1.ResourceList{}, + machineInfo, map[int]map[v1.ResourceName]resource.Quantity{}, "", }, { - "Node Allocatable set to zero, pre-reserved set to zero", + "Node Allocatable set to zero, reserved set to zero", v1.ResourceList{v1.ResourceMemory: *resource.NewQuantity(0, resource.DecimalSI)}, + machineInfo, map[int]map[v1.ResourceName]resource.Quantity{ 0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(0, resource.DecimalSI)}, }, "", }, { - "Node Allocatable not set (equal zero), pre-reserved set", + "Node Allocatable not set (equal zero), reserved set", v1.ResourceList{}, + machineInfo, map[int]map[v1.ResourceName]resource.Quantity{ 0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(12, resource.DecimalSI)}, }, fmt.Sprintf(msgNotEqual, v1.ResourceMemory), }, { - "Node Allocatable set, pre-reserved not set", + "Node Allocatable set, reserved not set", v1.ResourceList{hugepages2M: *resource.NewQuantity(5, resource.DecimalSI)}, + machineInfo, map[int]map[v1.ResourceName]resource.Quantity{}, fmt.Sprintf(msgNotEqual, hugepages2M), }, { - "Pre-reserved not equal to Node Allocatable", + "Reserved not equal to Node Allocatable", v1.ResourceList{v1.ResourceMemory: *resource.NewQuantity(5, resource.DecimalSI)}, + machineInfo, + map[int]map[v1.ResourceName]resource.Quantity{ + 0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(12, resource.DecimalSI)}, + }, + fmt.Sprintf(msgNotEqual, v1.ResourceMemory), + }, + { + "Reserved contains the NUMA node that does not exist under the machine", + v1.ResourceList{v1.ResourceMemory: *resource.NewQuantity(17, resource.DecimalSI)}, + machineInfo, map[int]map[v1.ResourceName]resource.Quantity{ 0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(12, resource.DecimalSI)}, + 2: nodeResources{v1.ResourceMemory: *resource.NewQuantity(5, resource.DecimalSI)}, }, fmt.Sprintf(msgNotEqual, v1.ResourceMemory), }, { - "Pre-reserved total equal to Node Allocatable", + "Reserved total equal to Node Allocatable", v1.ResourceList{v1.ResourceMemory: *resource.NewQuantity(17, resource.DecimalSI), hugepages2M: *resource.NewQuantity(77, resource.DecimalSI), hugepages1G: *resource.NewQuantity(13, resource.DecimalSI)}, + machineInfo, map[int]map[v1.ResourceName]resource.Quantity{ 0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(12, resource.DecimalSI), hugepages2M: *resource.NewQuantity(70, resource.DecimalSI), @@ -204,10 +227,11 @@ func TestValidateReservedMemory(t *testing.T) { "", }, { - "Pre-reserved total hugapages-2M not equal to Node Allocatable", + "Reserved total hugapages-2M not equal to Node Allocatable", v1.ResourceList{v1.ResourceMemory: *resource.NewQuantity(17, resource.DecimalSI), hugepages2M: *resource.NewQuantity(14, resource.DecimalSI), hugepages1G: *resource.NewQuantity(13, resource.DecimalSI)}, + machineInfo, map[int]map[v1.ResourceName]resource.Quantity{ 0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(12, resource.DecimalSI), hugepages2M: *resource.NewQuantity(70, resource.DecimalSI), @@ -215,13 +239,14 @@ func TestValidateReservedMemory(t *testing.T) { 1: nodeResources{v1.ResourceMemory: *resource.NewQuantity(5, resource.DecimalSI), hugepages2M: *resource.NewQuantity(7, resource.DecimalSI)}, }, + fmt.Sprintf(msgNotEqual, hugepages2M), }, } for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { - err := validateReservedMemory(tc.nodeAllocatableReservation, tc.systemReservedMemory) + err := validateReservedMemory(tc.machineInfo, tc.nodeAllocatableReservation, tc.systemReservedMemory) if strings.TrimSpace(tc.expectedError) != "" { assert.Error(t, err) assert.Equal(t, err.Error(), tc.expectedError) @@ -254,7 +279,7 @@ func TestConvertPreReserved(t *testing.T) { "", }, { - "Single NUMA node is pre-reserved", + "Single NUMA node is reserved", map[int]map[v1.ResourceName]resource.Quantity{ 0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(12, resource.DecimalSI), hugepages2M: *resource.NewQuantity(70, resource.DecimalSI), @@ -271,7 +296,7 @@ func TestConvertPreReserved(t *testing.T) { "", }, { - "Both NUMA nodes are pre-reserved", + "Both NUMA nodes are reserved", map[int]map[v1.ResourceName]resource.Quantity{ 0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(12, resource.DecimalSI), hugepages2M: *resource.NewQuantity(70, resource.DecimalSI), From a015e4163fdc01a29f847d71b74d234ffe62cf27 Mon Sep 17 00:00:00 2001 From: Artyom Lukianov Date: Wed, 4 Nov 2020 14:28:04 +0200 Subject: [PATCH 20/31] memory manager: rename state structs and fields The commit rename state structs and some fields under these structs. - NodeMap -> NUMANodeMap - NodeState -> NUMANodeState - NUMANodeState.Nodes -> NUMANodesState.Cells Signed-off-by: Artyom Lukianov --- .../cm/memorymanager/memory_manager_test.go | 258 ++++++++-------- pkg/kubelet/cm/memorymanager/policy_static.go | 34 +-- .../cm/memorymanager/policy_static_test.go | 278 +++++++++--------- .../cm/memorymanager/state/checkpoint.go | 4 +- pkg/kubelet/cm/memorymanager/state/state.go | 28 +- .../memorymanager/state/state_checkpoint.go | 6 +- .../state/state_checkpoint_test.go | 42 +-- .../cm/memorymanager/state/state_mem.go | 12 +- 8 files changed, 331 insertions(+), 331 deletions(-) diff --git a/pkg/kubelet/cm/memorymanager/memory_manager_test.go b/pkg/kubelet/cm/memorymanager/memory_manager_test.go index edbaa1fed8f4..c503812046d1 100644 --- a/pkg/kubelet/cm/memorymanager/memory_manager_test.go +++ b/pkg/kubelet/cm/memorymanager/memory_manager_test.go @@ -48,8 +48,8 @@ type testMemoryManager struct { machineInfo cadvisorapi.MachineInfo assignments state.ContainerMemoryAssignments expectedAssignments state.ContainerMemoryAssignments - machineState state.NodeMap - expectedMachineState state.NodeMap + machineState state.NUMANodeMap + expectedMachineState state.NUMANodeMap expectedError error expectedAllocateError error expectedAddContainerError error @@ -461,9 +461,9 @@ func TestRemoveStaleState(t *testing.T) { }, }, }, - machineState: state.NodeMap{ - 0: &state.NodeState{ - Nodes: []int{0}, + machineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ + Cells: []int{0}, NumberOfAssignments: 4, MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { @@ -482,8 +482,8 @@ func TestRemoveStaleState(t *testing.T) { }, }, }, - 1: &state.NodeState{ - Nodes: []int{1}, + 1: &state.NUMANodeState{ + Cells: []int{1}, NumberOfAssignments: 0, MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { @@ -503,9 +503,9 @@ func TestRemoveStaleState(t *testing.T) { }, }, }, - expectedMachineState: state.NodeMap{ - 0: &state.NodeState{ - Nodes: []int{0}, + expectedMachineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ + Cells: []int{0}, NumberOfAssignments: 4, MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { @@ -524,8 +524,8 @@ func TestRemoveStaleState(t *testing.T) { }, }, }, - 1: &state.NodeState{ - Nodes: []int{1}, + 1: &state.NUMANodeState{ + Cells: []int{1}, NumberOfAssignments: 0, MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { @@ -587,9 +587,9 @@ func TestRemoveStaleState(t *testing.T) { }, }, expectedAssignments: state.ContainerMemoryAssignments{}, - machineState: state.NodeMap{ - 0: &state.NodeState{ - Nodes: []int{0}, + machineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ + Cells: []int{0}, NumberOfAssignments: 4, MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { @@ -608,8 +608,8 @@ func TestRemoveStaleState(t *testing.T) { }, }, }, - 1: &state.NodeState{ - Nodes: []int{1}, + 1: &state.NUMANodeState{ + Cells: []int{1}, NumberOfAssignments: 0, MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { @@ -629,9 +629,9 @@ func TestRemoveStaleState(t *testing.T) { }, }, }, - expectedMachineState: state.NodeMap{ - 0: &state.NodeState{ - Nodes: []int{0}, + expectedMachineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ + Cells: []int{0}, NumberOfAssignments: 0, MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { @@ -650,8 +650,8 @@ func TestRemoveStaleState(t *testing.T) { }, }, }, - 1: &state.NodeState{ - Nodes: []int{1}, + 1: &state.NUMANodeState{ + Cells: []int{1}, NumberOfAssignments: 0, MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { @@ -713,9 +713,9 @@ func TestRemoveStaleState(t *testing.T) { }, }, expectedAssignments: state.ContainerMemoryAssignments{}, - machineState: state.NodeMap{ - 0: &state.NodeState{ - Nodes: []int{0, 1}, + machineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ + Cells: []int{0, 1}, NumberOfAssignments: 4, MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { @@ -734,8 +734,8 @@ func TestRemoveStaleState(t *testing.T) { }, }, }, - 1: &state.NodeState{ - Nodes: []int{0, 1}, + 1: &state.NUMANodeState{ + Cells: []int{0, 1}, NumberOfAssignments: 4, MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { @@ -755,9 +755,9 @@ func TestRemoveStaleState(t *testing.T) { }, }, }, - expectedMachineState: state.NodeMap{ - 0: &state.NodeState{ - Nodes: []int{0}, + expectedMachineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ + Cells: []int{0}, NumberOfAssignments: 0, MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { @@ -776,8 +776,8 @@ func TestRemoveStaleState(t *testing.T) { }, }, }, - 1: &state.NodeState{ - Nodes: []int{1}, + 1: &state.NUMANodeState{ + Cells: []int{1}, NumberOfAssignments: 0, MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { @@ -846,9 +846,9 @@ func TestAddContainer(t *testing.T) { policyName: policyTypeStatic, machineInfo: machineInfo, reserved: reserved, - machineState: state.NodeMap{ - 0: &state.NodeState{ - Nodes: []int{0}, + machineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ + Cells: []int{0}, NumberOfAssignments: 0, MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { @@ -867,8 +867,8 @@ func TestAddContainer(t *testing.T) { }, }, }, - 1: &state.NodeState{ - Nodes: []int{1}, + 1: &state.NUMANodeState{ + Cells: []int{1}, NumberOfAssignments: 0, MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { @@ -888,9 +888,9 @@ func TestAddContainer(t *testing.T) { }, }, }, - expectedMachineState: state.NodeMap{ - 0: &state.NodeState{ - Nodes: []int{0}, + expectedMachineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ + Cells: []int{0}, NumberOfAssignments: 2, MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { @@ -909,8 +909,8 @@ func TestAddContainer(t *testing.T) { }, }, }, - 1: &state.NodeState{ - Nodes: []int{1}, + 1: &state.NUMANodeState{ + Cells: []int{1}, NumberOfAssignments: 0, MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { @@ -943,8 +943,8 @@ func TestAddContainer(t *testing.T) { policyName: policyTypeNone, machineInfo: machineInfo, reserved: reserved, - machineState: state.NodeMap{}, - expectedMachineState: state.NodeMap{}, + machineState: state.NUMANodeMap{}, + expectedMachineState: state.NUMANodeMap{}, expectedAllocateError: nil, expectedAddContainerError: nil, podAllocate: pod, @@ -957,9 +957,9 @@ func TestAddContainer(t *testing.T) { policyName: policyTypeMock, machineInfo: machineInfo, reserved: reserved, - machineState: state.NodeMap{ - 0: &state.NodeState{ - Nodes: []int{0}, + machineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ + Cells: []int{0}, NumberOfAssignments: 0, MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { @@ -978,8 +978,8 @@ func TestAddContainer(t *testing.T) { }, }, }, - 1: &state.NodeState{ - Nodes: []int{1}, + 1: &state.NUMANodeState{ + Cells: []int{1}, NumberOfAssignments: 0, MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { @@ -999,9 +999,9 @@ func TestAddContainer(t *testing.T) { }, }, }, - expectedMachineState: state.NodeMap{ - 0: &state.NodeState{ - Nodes: []int{0}, + expectedMachineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ + Cells: []int{0}, NumberOfAssignments: 0, MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { @@ -1020,8 +1020,8 @@ func TestAddContainer(t *testing.T) { }, }, }, - 1: &state.NodeState{ - Nodes: []int{1}, + 1: &state.NUMANodeState{ + Cells: []int{1}, NumberOfAssignments: 0, MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { @@ -1053,9 +1053,9 @@ func TestAddContainer(t *testing.T) { policyName: policyTypeStatic, machineInfo: machineInfo, reserved: reserved, - machineState: state.NodeMap{ - 0: &state.NodeState{ - Nodes: []int{0}, + machineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ + Cells: []int{0}, NumberOfAssignments: 0, MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { @@ -1074,8 +1074,8 @@ func TestAddContainer(t *testing.T) { }, }, }, - 1: &state.NodeState{ - Nodes: []int{1}, + 1: &state.NUMANodeState{ + Cells: []int{1}, NumberOfAssignments: 0, MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { @@ -1095,9 +1095,9 @@ func TestAddContainer(t *testing.T) { }, }, }, - expectedMachineState: state.NodeMap{ - 0: &state.NodeState{ - Nodes: []int{0}, + expectedMachineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ + Cells: []int{0}, NumberOfAssignments: 0, MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { @@ -1116,8 +1116,8 @@ func TestAddContainer(t *testing.T) { }, }, }, - 1: &state.NodeState{ - Nodes: []int{1}, + 1: &state.NUMANodeState{ + Cells: []int{1}, NumberOfAssignments: 0, MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { @@ -1148,9 +1148,9 @@ func TestAddContainer(t *testing.T) { policyName: policyTypeStatic, machineInfo: machineInfo, reserved: reserved, - machineState: state.NodeMap{ - 0: &state.NodeState{ - Nodes: []int{0}, + machineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ + Cells: []int{0}, NumberOfAssignments: 0, MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { @@ -1169,8 +1169,8 @@ func TestAddContainer(t *testing.T) { }, }, }, - 1: &state.NodeState{ - Nodes: []int{1}, + 1: &state.NUMANodeState{ + Cells: []int{1}, NumberOfAssignments: 0, MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { @@ -1190,9 +1190,9 @@ func TestAddContainer(t *testing.T) { }, }, }, - expectedMachineState: state.NodeMap{ - 0: &state.NodeState{ - Nodes: []int{0, 1}, + expectedMachineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ + Cells: []int{0, 1}, NumberOfAssignments: 2, MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { @@ -1211,8 +1211,8 @@ func TestAddContainer(t *testing.T) { }, }, }, - 1: &state.NodeState{ - Nodes: []int{0, 1}, + 1: &state.NUMANodeState{ + Cells: []int{0, 1}, NumberOfAssignments: 2, MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { @@ -1255,9 +1255,9 @@ func TestAddContainer(t *testing.T) { machineInfo: machineInfo, firstPod: pod, reserved: reserved, - machineState: state.NodeMap{ - 0: &state.NodeState{ - Nodes: []int{0}, + machineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ + Cells: []int{0}, NumberOfAssignments: 2, MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { @@ -1276,8 +1276,8 @@ func TestAddContainer(t *testing.T) { }, }, }, - 1: &state.NodeState{ - Nodes: []int{1}, + 1: &state.NUMANodeState{ + Cells: []int{1}, NumberOfAssignments: 0, MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { @@ -1313,9 +1313,9 @@ func TestAddContainer(t *testing.T) { }, }, }, - expectedMachineState: state.NodeMap{ - 0: &state.NodeState{ - Nodes: []int{0}, + expectedMachineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ + Cells: []int{0}, NumberOfAssignments: 2, MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { @@ -1334,8 +1334,8 @@ func TestAddContainer(t *testing.T) { }, }, }, - 1: &state.NodeState{ - Nodes: []int{1}, + 1: &state.NUMANodeState{ + Cells: []int{1}, NumberOfAssignments: 0, MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { @@ -1488,9 +1488,9 @@ func TestRemoveContainer(t *testing.T) { }, }, }, - machineState: state.NodeMap{ - 0: &state.NodeState{ - Nodes: []int{0}, + machineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ + Cells: []int{0}, NumberOfAssignments: 4, MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { @@ -1509,8 +1509,8 @@ func TestRemoveContainer(t *testing.T) { }, }, }, - 1: &state.NodeState{ - Nodes: []int{1}, + 1: &state.NUMANodeState{ + Cells: []int{1}, NumberOfAssignments: 0, MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { @@ -1530,9 +1530,9 @@ func TestRemoveContainer(t *testing.T) { }, }, }, - expectedMachineState: state.NodeMap{ - 0: &state.NodeState{ - Nodes: []int{0}, + expectedMachineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ + Cells: []int{0}, NumberOfAssignments: 2, MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { @@ -1551,8 +1551,8 @@ func TestRemoveContainer(t *testing.T) { }, }, }, - 1: &state.NodeState{ - Nodes: []int{1}, + 1: &state.NUMANodeState{ + Cells: []int{1}, NumberOfAssignments: 0, MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { @@ -1624,9 +1624,9 @@ func TestRemoveContainer(t *testing.T) { }, }, }, - machineState: state.NodeMap{ - 0: &state.NodeState{ - Nodes: []int{0, 1}, + machineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ + Cells: []int{0, 1}, NumberOfAssignments: 4, MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { @@ -1645,8 +1645,8 @@ func TestRemoveContainer(t *testing.T) { }, }, }, - 1: &state.NodeState{ - Nodes: []int{0, 1}, + 1: &state.NUMANodeState{ + Cells: []int{0, 1}, NumberOfAssignments: 4, MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { @@ -1666,9 +1666,9 @@ func TestRemoveContainer(t *testing.T) { }, }, }, - expectedMachineState: state.NodeMap{ - 0: &state.NodeState{ - Nodes: []int{0, 1}, + expectedMachineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ + Cells: []int{0, 1}, NumberOfAssignments: 2, MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { @@ -1687,8 +1687,8 @@ func TestRemoveContainer(t *testing.T) { }, }, }, - 1: &state.NodeState{ - Nodes: []int{0, 1}, + 1: &state.NUMANodeState{ + Cells: []int{0, 1}, NumberOfAssignments: 2, MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { @@ -1772,9 +1772,9 @@ func TestRemoveContainer(t *testing.T) { }, }, }, - machineState: state.NodeMap{ - 0: &state.NodeState{ - Nodes: []int{0}, + machineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ + Cells: []int{0}, NumberOfAssignments: 4, MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { @@ -1793,8 +1793,8 @@ func TestRemoveContainer(t *testing.T) { }, }, }, - 1: &state.NodeState{ - Nodes: []int{1}, + 1: &state.NUMANodeState{ + Cells: []int{1}, NumberOfAssignments: 0, MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { @@ -1814,9 +1814,9 @@ func TestRemoveContainer(t *testing.T) { }, }, }, - expectedMachineState: state.NodeMap{ - 0: &state.NodeState{ - Nodes: []int{0}, + expectedMachineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ + Cells: []int{0}, NumberOfAssignments: 4, MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { @@ -1835,8 +1835,8 @@ func TestRemoveContainer(t *testing.T) { }, }, }, - 1: &state.NodeState{ - Nodes: []int{1}, + 1: &state.NUMANodeState{ + Cells: []int{1}, NumberOfAssignments: 0, MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { @@ -1920,9 +1920,9 @@ func TestRemoveContainer(t *testing.T) { }, }, }, - machineState: state.NodeMap{ - 0: &state.NodeState{ - Nodes: []int{0}, + machineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ + Cells: []int{0}, NumberOfAssignments: 4, MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { @@ -1941,8 +1941,8 @@ func TestRemoveContainer(t *testing.T) { }, }, }, - 1: &state.NodeState{ - Nodes: []int{1}, + 1: &state.NUMANodeState{ + Cells: []int{1}, NumberOfAssignments: 0, MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { @@ -1962,9 +1962,9 @@ func TestRemoveContainer(t *testing.T) { }, }, }, - expectedMachineState: state.NodeMap{ - 0: &state.NodeState{ - Nodes: []int{0}, + expectedMachineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ + Cells: []int{0}, NumberOfAssignments: 4, MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { @@ -1983,8 +1983,8 @@ func TestRemoveContainer(t *testing.T) { }, }, }, - 1: &state.NodeState{ - Nodes: []int{1}, + 1: &state.NUMANodeState{ + Cells: []int{1}, NumberOfAssignments: 0, MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { @@ -2193,9 +2193,9 @@ func TestGetTopologyHints(t *testing.T) { }, }, }, - machineState: state.NodeMap{ - 0: &state.NodeState{ - Nodes: []int{0}, + machineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ + Cells: []int{0}, NumberOfAssignments: 4, MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { @@ -2214,8 +2214,8 @@ func TestGetTopologyHints(t *testing.T) { }, }, }, - 1: &state.NodeState{ - Nodes: []int{1}, + 1: &state.NUMANodeState{ + Cells: []int{1}, NumberOfAssignments: 0, MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { diff --git a/pkg/kubelet/cm/memorymanager/policy_static.go b/pkg/kubelet/cm/memorymanager/policy_static.go index c26b4efb2652..2da2ecd4c6ba 100644 --- a/pkg/kubelet/cm/memorymanager/policy_static.go +++ b/pkg/kubelet/cm/memorymanager/policy_static.go @@ -37,7 +37,7 @@ const policyTypeStatic policyType = "static" type systemReservedMemory map[int]map[v1.ResourceName]uint64 -// SingleNUMAPolicy is implementation of the policy interface for the single NUMA policy +// staticPolicy is implementation of the policy interface for the static policy type staticPolicy struct { // machineInfo contains machine memory related information machineInfo *cadvisorapi.MachineInfo @@ -49,7 +49,7 @@ type staticPolicy struct { var _ Policy = &staticPolicy{} -// NewPolicyStatic returns new single NUMA policy instance +// NewPolicyStatic returns new static policy instance func NewPolicyStatic(machineInfo *cadvisorapi.MachineInfo, reserved systemReservedMemory, affinity topologymanager.Store) (Policy, error) { var totalSystemReserved uint64 for _, node := range reserved { @@ -149,7 +149,7 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai // Update nodes memory state for _, nodeID := range maskBits { machineState[nodeID].NumberOfAssignments++ - machineState[nodeID].Nodes = maskBits + machineState[nodeID].Cells = maskBits // we need to continue to update all affinity mask nodes if requestedSize == 0 { @@ -202,7 +202,7 @@ func (p *staticPolicy) RemoveContainer(s state.State, podUID string, containerNa // once we do not have any memory allocations on this node, clear node groups if machineState[nodeID].NumberOfAssignments == 0 { - machineState[nodeID].Nodes = []int{nodeID} + machineState[nodeID].Cells = []int{nodeID} } // we still need to pass over all NUMA node under the affinity mask to update them @@ -390,7 +390,7 @@ func (p *staticPolicy) calculateHints(s state.State, requestedResources map[v1.R } sort.Ints(numaNodes) - // Initialize minAffinitySize to include all NUMA Nodes. + // Initialize minAffinitySize to include all NUMA Cells. minAffinitySize := len(numaNodes) hints := map[string][]topologymanager.TopologyHint{} @@ -399,7 +399,7 @@ func (p *staticPolicy) calculateHints(s state.State, requestedResources map[v1.R singleNUMAHint := len(maskBits) == 1 // the node already in group with another node, it can not be used for the single NUMA node allocation - if singleNUMAHint && len(machineState[maskBits[0]].Nodes) > 1 { + if singleNUMAHint && len(machineState[maskBits[0]].Cells) > 1 { return } @@ -410,12 +410,12 @@ func (p *staticPolicy) calculateHints(s state.State, requestedResources map[v1.R // the node already used for the memory allocation if !singleNUMAHint && machineState[nodeID].NumberOfAssignments > 0 { // the node used for the single NUMA memory allocation, it can not be used for the multi NUMA node allocation - if len(machineState[nodeID].Nodes) == 1 { + if len(machineState[nodeID].Cells) == 1 { return } // the node already used with different group of nodes, it can not be use with in the current hint - if !areGroupsEqual(machineState[nodeID].Nodes, maskBits) { + if !areGroupsEqual(machineState[nodeID].Cells, maskBits) { return } } @@ -524,7 +524,7 @@ func (p *staticPolicy) validateState(s state.State) error { } nodeState.NumberOfAssignments++ - nodeState.Nodes = b.NUMAAffinity + nodeState.Cells = b.NUMAAffinity memoryState, ok := nodeState.MemoryMap[b.Type] if !ok { @@ -568,7 +568,7 @@ func (p *staticPolicy) validateState(s state.State) error { return nil } -func areMachineStatesEqual(ms1, ms2 state.NodeMap) bool { +func areMachineStatesEqual(ms1, ms2 state.NUMANodeMap) bool { if len(ms1) != len(ms2) { klog.Errorf("[memorymanager] node states are different len(ms1) != len(ms2): %d != %d", len(ms1), len(ms2)) return false @@ -586,8 +586,8 @@ func areMachineStatesEqual(ms1, ms2 state.NodeMap) bool { return false } - if !areGroupsEqual(nodeState1.Nodes, nodeState2.Nodes) { - klog.Errorf("[memorymanager] node states groups are different: %v != %v", nodeState1.Nodes, nodeState2.Nodes) + if !areGroupsEqual(nodeState1.Cells, nodeState2.Cells) { + klog.Errorf("[memorymanager] node states groups are different: %v != %v", nodeState1.Cells, nodeState2.Cells) return false } @@ -612,14 +612,14 @@ func areMachineStatesEqual(ms1, ms2 state.NodeMap) bool { return true } -func (p *staticPolicy) getDefaultMachineState() state.NodeMap { - defaultMachineState := state.NodeMap{} +func (p *staticPolicy) getDefaultMachineState() state.NUMANodeMap { + defaultMachineState := state.NUMANodeMap{} nodeHugepages := map[int]uint64{} for _, node := range p.machineInfo.Topology { - defaultMachineState[node.Id] = &state.NodeState{ + defaultMachineState[node.Id] = &state.NUMANodeState{ NumberOfAssignments: 0, MemoryMap: map[v1.ResourceName]*state.MemoryTable{}, - Nodes: []int{node.Id}, + Cells: []int{node.Id}, } // fill memory table with huge pages values @@ -681,7 +681,7 @@ func (p *staticPolicy) getDefaultHint(s state.State, requestedResources map[v1.R return findBestHint(hints[string(v1.ResourceMemory)]), nil } -func isAffinitySatisfyRequest(machineState state.NodeMap, mask bitmask.BitMask, requestedResources map[v1.ResourceName]uint64) bool { +func isAffinitySatisfyRequest(machineState state.NUMANodeMap, mask bitmask.BitMask, requestedResources map[v1.ResourceName]uint64) bool { totalFreeSize := map[v1.ResourceName]uint64{} for _, nodeID := range mask.GetBits() { for resourceName := range requestedResources { diff --git a/pkg/kubelet/cm/memorymanager/policy_static_test.go b/pkg/kubelet/cm/memorymanager/policy_static_test.go index 49bea36a4843..612e2417de48 100644 --- a/pkg/kubelet/cm/memorymanager/policy_static_test.go +++ b/pkg/kubelet/cm/memorymanager/policy_static_test.go @@ -119,8 +119,8 @@ type testStaticPolicy struct { description string assignments state.ContainerMemoryAssignments expectedAssignments state.ContainerMemoryAssignments - machineState state.NodeMap - expectedMachineState state.NodeMap + machineState state.NUMANodeMap + expectedMachineState state.NUMANodeMap systemReserved systemReservedMemory expectedError error machineInfo *cadvisorapi.MachineInfo @@ -229,8 +229,8 @@ func TestStaticPolicyStart(t *testing.T) { { description: "should fill the state with default values, when the state is empty", expectedAssignments: state.ContainerMemoryAssignments{}, - expectedMachineState: state.NodeMap{ - 0: &state.NodeState{ + expectedMachineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { Allocatable: 1536 * mb, @@ -248,7 +248,7 @@ func TestStaticPolicyStart(t *testing.T) { }, }, NumberOfAssignments: 0, - Nodes: []int{0}, + Cells: []int{0}, }, }, systemReserved: systemReservedMemory{ @@ -274,8 +274,8 @@ func TestStaticPolicyStart(t *testing.T) { }, { description: "should fail when machine state does not have all NUMA nodes", - machineState: state.NodeMap{ - 0: &state.NodeState{ + machineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { Allocatable: 1536 * mb, @@ -292,7 +292,7 @@ func TestStaticPolicyStart(t *testing.T) { TotalMemSize: gb, }, }, - Nodes: []int{0}, + Cells: []int{0}, NumberOfAssignments: 0, }, }, @@ -331,8 +331,8 @@ func TestStaticPolicyStart(t *testing.T) { }, { description: "should fail when machine state does not have memory resource", - machineState: state.NodeMap{ - 0: &state.NodeState{ + machineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ MemoryMap: map[v1.ResourceName]*state.MemoryTable{ hugepages1Gi: { Allocatable: gb, @@ -342,7 +342,7 @@ func TestStaticPolicyStart(t *testing.T) { TotalMemSize: gb, }, }, - Nodes: []int{0}, + Cells: []int{0}, NumberOfAssignments: 0, }, }, @@ -370,8 +370,8 @@ func TestStaticPolicyStart(t *testing.T) { }, { description: "should fail when machine state has wrong size of total memory", - machineState: state.NodeMap{ - 0: &state.NodeState{ + machineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { Allocatable: 1536 * mb, @@ -381,7 +381,7 @@ func TestStaticPolicyStart(t *testing.T) { TotalMemSize: 1536 * mb, }, }, - Nodes: []int{0}, + Cells: []int{0}, NumberOfAssignments: 0, }, }, @@ -409,8 +409,8 @@ func TestStaticPolicyStart(t *testing.T) { }, { description: "should fail when machine state has wrong size of system reserved memory", - machineState: state.NodeMap{ - 0: &state.NodeState{ + machineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { Allocatable: 1536 * mb, @@ -420,7 +420,7 @@ func TestStaticPolicyStart(t *testing.T) { TotalMemSize: 2 * gb, }, }, - Nodes: []int{0}, + Cells: []int{0}, NumberOfAssignments: 0, }, }, @@ -459,8 +459,8 @@ func TestStaticPolicyStart(t *testing.T) { }, }, }, - machineState: state.NodeMap{ - 0: &state.NodeState{ + machineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { Allocatable: 1536 * mb, @@ -470,7 +470,7 @@ func TestStaticPolicyStart(t *testing.T) { TotalMemSize: 2 * gb, }, }, - Nodes: []int{0}, + Cells: []int{0}, NumberOfAssignments: 1, }, }, @@ -498,8 +498,8 @@ func TestStaticPolicyStart(t *testing.T) { }, { description: "should fail when machine state has wrong size of hugepages", - machineState: state.NodeMap{ - 0: &state.NodeState{ + machineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { Allocatable: 1536 * mb, @@ -516,7 +516,7 @@ func TestStaticPolicyStart(t *testing.T) { TotalMemSize: gb, }, }, - Nodes: []int{0}, + Cells: []int{0}, NumberOfAssignments: 0, }, }, @@ -544,8 +544,8 @@ func TestStaticPolicyStart(t *testing.T) { }, { description: "should fail when machine state has wrong size of system reserved hugepages", - machineState: state.NodeMap{ - 0: &state.NodeState{ + machineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { Allocatable: 1536 * mb, @@ -562,7 +562,7 @@ func TestStaticPolicyStart(t *testing.T) { TotalMemSize: 2 * gb, }, }, - Nodes: []int{0}, + Cells: []int{0}, NumberOfAssignments: 0, }, }, @@ -610,8 +610,8 @@ func TestStaticPolicyStart(t *testing.T) { }, }, }, - machineState: state.NodeMap{ - 0: &state.NodeState{ + machineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { Allocatable: 1536 * mb, @@ -628,7 +628,7 @@ func TestStaticPolicyStart(t *testing.T) { TotalMemSize: 4 * gb, }, }, - Nodes: []int{0}, + Cells: []int{0}, NumberOfAssignments: 2, }, }, @@ -667,8 +667,8 @@ func TestStaticPolicyStart(t *testing.T) { }, }, }, - machineState: state.NodeMap{ - 0: &state.NodeState{ + machineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { Allocatable: 1536 * mb, @@ -685,7 +685,7 @@ func TestStaticPolicyStart(t *testing.T) { TotalMemSize: gb, }, }, - Nodes: []int{0}, + Cells: []int{0}, NumberOfAssignments: 0, }, }, @@ -729,8 +729,8 @@ func TestStaticPolicyStart(t *testing.T) { }, }, }, - machineState: state.NodeMap{ - 0: &state.NodeState{ + machineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { Allocatable: 1536 * mb, @@ -747,7 +747,7 @@ func TestStaticPolicyStart(t *testing.T) { TotalMemSize: gb, }, }, - Nodes: []int{0}, + Cells: []int{0}, NumberOfAssignments: 2, }, }, @@ -791,8 +791,8 @@ func TestStaticPolicyStart(t *testing.T) { }, }, }, - machineState: state.NodeMap{ - 0: &state.NodeState{ + machineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { Allocatable: 1536 * mb, @@ -809,7 +809,7 @@ func TestStaticPolicyStart(t *testing.T) { TotalMemSize: gb, }, }, - Nodes: []int{0}, + Cells: []int{0}, NumberOfAssignments: 1, }, }, @@ -897,8 +897,8 @@ func TestStaticPolicyStart(t *testing.T) { }, }, }, - machineState: state.NodeMap{ - 0: &state.NodeState{ + machineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { Allocatable: 640 * mb, @@ -915,10 +915,10 @@ func TestStaticPolicyStart(t *testing.T) { TotalMemSize: gb, }, }, - Nodes: []int{0, 1}, + Cells: []int{0, 1}, NumberOfAssignments: 4, }, - 1: &state.NodeState{ + 1: &state.NUMANodeState{ MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { Allocatable: 640 * mb, @@ -935,12 +935,12 @@ func TestStaticPolicyStart(t *testing.T) { TotalMemSize: gb, }, }, - Nodes: []int{0, 1}, + Cells: []int{0, 1}, NumberOfAssignments: 4, }, }, - expectedMachineState: state.NodeMap{ - 0: &state.NodeState{ + expectedMachineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { Allocatable: 640 * mb, @@ -957,10 +957,10 @@ func TestStaticPolicyStart(t *testing.T) { TotalMemSize: gb, }, }, - Nodes: []int{0, 1}, + Cells: []int{0, 1}, NumberOfAssignments: 4, }, - 1: &state.NodeState{ + 1: &state.NUMANodeState{ MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { Allocatable: 640 * mb, @@ -977,7 +977,7 @@ func TestStaticPolicyStart(t *testing.T) { TotalMemSize: gb, }, }, - Nodes: []int{0, 1}, + Cells: []int{0, 1}, NumberOfAssignments: 4, }, }, @@ -1053,8 +1053,8 @@ func TestStaticPolicyAllocate(t *testing.T) { { description: "should do nothing for non-guaranteed pods", expectedAssignments: state.ContainerMemoryAssignments{}, - machineState: state.NodeMap{ - 0: &state.NodeState{ + machineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { Allocatable: 1536 * mb, @@ -1071,11 +1071,11 @@ func TestStaticPolicyAllocate(t *testing.T) { TotalMemSize: gb, }, }, - Nodes: []int{}, + Cells: []int{}, }, }, - expectedMachineState: state.NodeMap{ - 0: &state.NodeState{ + expectedMachineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { Allocatable: 1536 * mb, @@ -1092,7 +1092,7 @@ func TestStaticPolicyAllocate(t *testing.T) { TotalMemSize: gb, }, }, - Nodes: []int{}, + Cells: []int{}, }, }, systemReserved: systemReservedMemory{ @@ -1128,8 +1128,8 @@ func TestStaticPolicyAllocate(t *testing.T) { }, }, }, - machineState: state.NodeMap{ - 0: &state.NodeState{ + machineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { Allocatable: 1536 * mb, @@ -1146,11 +1146,11 @@ func TestStaticPolicyAllocate(t *testing.T) { TotalMemSize: gb, }, }, - Nodes: []int{}, + Cells: []int{}, }, }, - expectedMachineState: state.NodeMap{ - 0: &state.NodeState{ + expectedMachineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { Allocatable: 1536 * mb, @@ -1167,7 +1167,7 @@ func TestStaticPolicyAllocate(t *testing.T) { TotalMemSize: gb, }, }, - Nodes: []int{}, + Cells: []int{}, }, }, systemReserved: systemReservedMemory{ @@ -1198,8 +1198,8 @@ func TestStaticPolicyAllocate(t *testing.T) { }, }, }, - machineState: state.NodeMap{ - 0: &state.NodeState{ + machineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { Allocatable: 1536 * mb, @@ -1216,11 +1216,11 @@ func TestStaticPolicyAllocate(t *testing.T) { TotalMemSize: gb, }, }, - Nodes: []int{0}, + Cells: []int{0}, }, }, - expectedMachineState: state.NodeMap{ - 0: &state.NodeState{ + expectedMachineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { Allocatable: 1536 * mb, @@ -1237,7 +1237,7 @@ func TestStaticPolicyAllocate(t *testing.T) { TotalMemSize: gb, }, }, - Nodes: []int{0}, + Cells: []int{0}, NumberOfAssignments: 2, }, }, @@ -1267,8 +1267,8 @@ func TestStaticPolicyAllocate(t *testing.T) { }, }, }, - machineState: state.NodeMap{ - 0: &state.NodeState{ + machineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { Allocatable: 1536 * mb, @@ -1285,7 +1285,7 @@ func TestStaticPolicyAllocate(t *testing.T) { TotalMemSize: gb, }, }, - Nodes: []int{0}, + Cells: []int{0}, NumberOfAssignments: 2, }, }, @@ -1311,8 +1311,8 @@ func TestStaticPolicyAllocate(t *testing.T) { }, }, }, - machineState: state.NodeMap{ - 0: &state.NodeState{ + machineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { Allocatable: gb, @@ -1329,10 +1329,10 @@ func TestStaticPolicyAllocate(t *testing.T) { TotalMemSize: gb, }, }, - Nodes: []int{0}, + Cells: []int{0}, NumberOfAssignments: 1, }, - 1: &state.NodeState{ + 1: &state.NUMANodeState{ MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { Allocatable: 512 * mb, @@ -1349,10 +1349,10 @@ func TestStaticPolicyAllocate(t *testing.T) { TotalMemSize: gb, }, }, - Nodes: []int{1}, + Cells: []int{1}, NumberOfAssignments: 0, }, - 2: &state.NodeState{ + 2: &state.NUMANodeState{ MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { Allocatable: 512 * mb, @@ -1369,7 +1369,7 @@ func TestStaticPolicyAllocate(t *testing.T) { TotalMemSize: gb, }, }, - Nodes: []int{2}, + Cells: []int{2}, NumberOfAssignments: 0, }, }, @@ -1390,8 +1390,8 @@ func TestStaticPolicyAllocate(t *testing.T) { }, { description: "should fail when NUMA affinity provided under the topology manager hint did not satisfy container requirements and extended hint generation failed", - machineState: state.NodeMap{ - 0: &state.NodeState{ + machineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { Allocatable: 512 * mb, @@ -1408,10 +1408,10 @@ func TestStaticPolicyAllocate(t *testing.T) { TotalMemSize: gb, }, }, - Nodes: []int{0}, + Cells: []int{0}, NumberOfAssignments: 0, }, - 1: &state.NodeState{ + 1: &state.NUMANodeState{ MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { Allocatable: 1536 * mb, @@ -1428,10 +1428,10 @@ func TestStaticPolicyAllocate(t *testing.T) { TotalMemSize: gb, }, }, - Nodes: []int{1, 2}, + Cells: []int{1, 2}, NumberOfAssignments: 1, }, - 2: &state.NodeState{ + 2: &state.NUMANodeState{ MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { Allocatable: 1536 * mb, @@ -1448,7 +1448,7 @@ func TestStaticPolicyAllocate(t *testing.T) { TotalMemSize: gb, }, }, - Nodes: []int{1, 2}, + Cells: []int{1, 2}, NumberOfAssignments: 1, }, }, @@ -1480,8 +1480,8 @@ func TestStaticPolicyAllocate(t *testing.T) { }, }, }, - machineState: state.NodeMap{ - 0: &state.NodeState{ + machineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { Allocatable: gb, @@ -1498,10 +1498,10 @@ func TestStaticPolicyAllocate(t *testing.T) { TotalMemSize: gb, }, }, - Nodes: []int{0}, + Cells: []int{0}, NumberOfAssignments: 1, }, - 1: &state.NodeState{ + 1: &state.NUMANodeState{ MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { Allocatable: 512 * mb, @@ -1518,10 +1518,10 @@ func TestStaticPolicyAllocate(t *testing.T) { TotalMemSize: gb, }, }, - Nodes: []int{1}, + Cells: []int{1}, NumberOfAssignments: 0, }, - 2: &state.NodeState{ + 2: &state.NUMANodeState{ MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { Allocatable: 512 * mb, @@ -1538,7 +1538,7 @@ func TestStaticPolicyAllocate(t *testing.T) { TotalMemSize: gb, }, }, - Nodes: []int{2}, + Cells: []int{2}, NumberOfAssignments: 0, }, }, @@ -1576,8 +1576,8 @@ func TestStaticPolicyAllocate(t *testing.T) { }, }, }, - machineState: state.NodeMap{ - 0: &state.NodeState{ + machineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { Allocatable: 512 * mb, @@ -1594,10 +1594,10 @@ func TestStaticPolicyAllocate(t *testing.T) { TotalMemSize: gb, }, }, - Nodes: []int{0}, + Cells: []int{0}, NumberOfAssignments: 0, }, - 1: &state.NodeState{ + 1: &state.NUMANodeState{ MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { Allocatable: 512 * mb, @@ -1614,10 +1614,10 @@ func TestStaticPolicyAllocate(t *testing.T) { TotalMemSize: gb, }, }, - Nodes: []int{1}, + Cells: []int{1}, NumberOfAssignments: 0, }, - 2: &state.NodeState{ + 2: &state.NUMANodeState{ MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { Allocatable: 512 * mb, @@ -1634,10 +1634,10 @@ func TestStaticPolicyAllocate(t *testing.T) { TotalMemSize: gb, }, }, - Nodes: []int{2}, + Cells: []int{2}, NumberOfAssignments: 0, }, - 3: &state.NodeState{ + 3: &state.NUMANodeState{ MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { Allocatable: 512 * mb, @@ -1654,12 +1654,12 @@ func TestStaticPolicyAllocate(t *testing.T) { TotalMemSize: gb, }, }, - Nodes: []int{3}, + Cells: []int{3}, NumberOfAssignments: 0, }, }, - expectedMachineState: state.NodeMap{ - 0: &state.NodeState{ + expectedMachineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { Allocatable: 512 * mb, @@ -1676,10 +1676,10 @@ func TestStaticPolicyAllocate(t *testing.T) { TotalMemSize: gb, }, }, - Nodes: []int{0, 1}, + Cells: []int{0, 1}, NumberOfAssignments: 2, }, - 1: &state.NodeState{ + 1: &state.NUMANodeState{ MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { Allocatable: 512 * mb, @@ -1696,10 +1696,10 @@ func TestStaticPolicyAllocate(t *testing.T) { TotalMemSize: gb, }, }, - Nodes: []int{0, 1}, + Cells: []int{0, 1}, NumberOfAssignments: 2, }, - 2: &state.NodeState{ + 2: &state.NUMANodeState{ MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { Allocatable: 512 * mb, @@ -1716,10 +1716,10 @@ func TestStaticPolicyAllocate(t *testing.T) { TotalMemSize: gb, }, }, - Nodes: []int{2}, + Cells: []int{2}, NumberOfAssignments: 0, }, - 3: &state.NodeState{ + 3: &state.NUMANodeState{ MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { Allocatable: 512 * mb, @@ -1736,7 +1736,7 @@ func TestStaticPolicyAllocate(t *testing.T) { TotalMemSize: gb, }, }, - Nodes: []int{3}, + Cells: []int{3}, NumberOfAssignments: 0, }, }, @@ -1794,8 +1794,8 @@ func TestStaticPolicyRemoveContainer(t *testing.T) { { description: "should do nothing when the container does not exist under the state", expectedAssignments: state.ContainerMemoryAssignments{}, - machineState: state.NodeMap{ - 0: &state.NodeState{ + machineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { Allocatable: 1536 * mb, @@ -1812,11 +1812,11 @@ func TestStaticPolicyRemoveContainer(t *testing.T) { TotalMemSize: gb, }, }, - Nodes: []int{}, + Cells: []int{}, }, }, - expectedMachineState: state.NodeMap{ - 0: &state.NodeState{ + expectedMachineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { Allocatable: 1536 * mb, @@ -1833,7 +1833,7 @@ func TestStaticPolicyRemoveContainer(t *testing.T) { TotalMemSize: gb, }, }, - Nodes: []int{}, + Cells: []int{}, }, }, systemReserved: systemReservedMemory{ @@ -1861,8 +1861,8 @@ func TestStaticPolicyRemoveContainer(t *testing.T) { }, }, expectedAssignments: state.ContainerMemoryAssignments{}, - machineState: state.NodeMap{ - 0: &state.NodeState{ + machineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { Allocatable: 1536 * mb, @@ -1880,11 +1880,11 @@ func TestStaticPolicyRemoveContainer(t *testing.T) { }, }, NumberOfAssignments: 2, - Nodes: []int{0}, + Cells: []int{0}, }, }, - expectedMachineState: state.NodeMap{ - 0: &state.NodeState{ + expectedMachineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { Allocatable: 1536 * mb, @@ -1901,7 +1901,7 @@ func TestStaticPolicyRemoveContainer(t *testing.T) { TotalMemSize: gb, }, }, - Nodes: []int{0}, + Cells: []int{0}, NumberOfAssignments: 0, }, }, @@ -1930,8 +1930,8 @@ func TestStaticPolicyRemoveContainer(t *testing.T) { }, }, expectedAssignments: state.ContainerMemoryAssignments{}, - machineState: state.NodeMap{ - 0: &state.NodeState{ + machineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { Allocatable: 512 * mb, @@ -1949,9 +1949,9 @@ func TestStaticPolicyRemoveContainer(t *testing.T) { }, }, NumberOfAssignments: 2, - Nodes: []int{0, 1}, + Cells: []int{0, 1}, }, - 1: &state.NodeState{ + 1: &state.NUMANodeState{ MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { Allocatable: 512 * mb, @@ -1969,11 +1969,11 @@ func TestStaticPolicyRemoveContainer(t *testing.T) { }, }, NumberOfAssignments: 2, - Nodes: []int{0, 1}, + Cells: []int{0, 1}, }, }, - expectedMachineState: state.NodeMap{ - 0: &state.NodeState{ + expectedMachineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { Allocatable: 512 * mb, @@ -1991,9 +1991,9 @@ func TestStaticPolicyRemoveContainer(t *testing.T) { }, }, NumberOfAssignments: 0, - Nodes: []int{0}, + Cells: []int{0}, }, - 1: &state.NodeState{ + 1: &state.NUMANodeState{ MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { Allocatable: 512 * mb, @@ -2011,7 +2011,7 @@ func TestStaticPolicyRemoveContainer(t *testing.T) { }, }, NumberOfAssignments: 0, - Nodes: []int{1}, + Cells: []int{1}, }, }, systemReserved: systemReservedMemory{ @@ -2123,8 +2123,8 @@ func TestStaticPolicyGetTopologyHints(t *testing.T) { }, }, }, - machineState: state.NodeMap{ - 0: &state.NodeState{ + machineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { Allocatable: 1536 * mb, @@ -2141,10 +2141,10 @@ func TestStaticPolicyGetTopologyHints(t *testing.T) { TotalMemSize: gb, }, }, - Nodes: []int{0, 1}, + Cells: []int{0, 1}, NumberOfAssignments: 2, }, - 1: &state.NodeState{ + 1: &state.NUMANodeState{ MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { Allocatable: 1536 * mb, @@ -2161,10 +2161,10 @@ func TestStaticPolicyGetTopologyHints(t *testing.T) { TotalMemSize: gb, }, }, - Nodes: []int{0, 1}, + Cells: []int{0, 1}, NumberOfAssignments: 2, }, - 2: &state.NodeState{ + 2: &state.NUMANodeState{ MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { Allocatable: 1536 * mb, @@ -2181,10 +2181,10 @@ func TestStaticPolicyGetTopologyHints(t *testing.T) { TotalMemSize: gb, }, }, - Nodes: []int{2}, + Cells: []int{2}, NumberOfAssignments: 0, }, - 3: &state.NodeState{ + 3: &state.NUMANodeState{ MemoryMap: map[v1.ResourceName]*state.MemoryTable{ v1.ResourceMemory: { Allocatable: 1536 * mb, @@ -2201,7 +2201,7 @@ func TestStaticPolicyGetTopologyHints(t *testing.T) { TotalMemSize: gb, }, }, - Nodes: []int{3}, + Cells: []int{3}, NumberOfAssignments: 0, }, }, diff --git a/pkg/kubelet/cm/memorymanager/state/checkpoint.go b/pkg/kubelet/cm/memorymanager/state/checkpoint.go index 918bbfb67634..a43b40272e02 100644 --- a/pkg/kubelet/cm/memorymanager/state/checkpoint.go +++ b/pkg/kubelet/cm/memorymanager/state/checkpoint.go @@ -28,7 +28,7 @@ var _ checkpointmanager.Checkpoint = &MemoryManagerCheckpoint{} // MemoryManagerCheckpoint struct is used to store memory/pod assignments in a checkpoint type MemoryManagerCheckpoint struct { PolicyName string `json:"policyName"` - MachineState NodeMap `json:"machineState"` + MachineState NUMANodeMap `json:"machineState"` Entries ContainerMemoryAssignments `json:"entries,omitempty"` Checksum checksum.Checksum `json:"checksum"` } @@ -38,7 +38,7 @@ func NewMemoryManagerCheckpoint() *MemoryManagerCheckpoint { //lint:ignore unexported-type-in-api user-facing error message return &MemoryManagerCheckpoint{ Entries: ContainerMemoryAssignments{}, - MachineState: NodeMap{}, + MachineState: NUMANodeMap{}, } } diff --git a/pkg/kubelet/cm/memorymanager/state/state.go b/pkg/kubelet/cm/memorymanager/state/state.go index 00a6f2a87575..322ca608e4ca 100644 --- a/pkg/kubelet/cm/memorymanager/state/state.go +++ b/pkg/kubelet/cm/memorymanager/state/state.go @@ -29,35 +29,35 @@ type MemoryTable struct { Free uint64 `json:"free"` } -// NodeState contains NUMA node related information -type NodeState struct { +// NUMANodeState contains NUMA node related information +type NUMANodeState struct { // NumberOfAssignments contains a number memory assignments from this node // When the container requires memory and hugepages it will increase number of assignments by two NumberOfAssignments int `json:"numberOfAssignments"` // MemoryTable contains NUMA node memory related information MemoryMap map[v1.ResourceName]*MemoryTable `json:"memoryMap"` - // Nodes contains the current NUMA node and all other nodes that are in a group with current NUMA node + // Cells contains the current NUMA node and all other nodes that are in a group with current NUMA node // This parameter indicates if the current node is used for the multiple NUMA node memory allocation // For example if some container has pinning 0,1,2, NUMA nodes 0,1,2 under the state will have // this parameter equals to [0, 1, 2] - Nodes []int `json:"nodes"` + Cells []int `json:"cells"` } -// NodeMap contains memory information for each NUMA node. -type NodeMap map[int]*NodeState +// NUMANodeMap contains memory information for each NUMA node. +type NUMANodeMap map[int]*NUMANodeState -// Clone returns a copy of NodeMap -func (nm NodeMap) Clone() NodeMap { - clone := make(NodeMap) +// Clone returns a copy of NUMANodeMap +func (nm NUMANodeMap) Clone() NUMANodeMap { + clone := make(NUMANodeMap) for node, s := range nm { if s == nil { clone[node] = nil continue } - clone[node] = &NodeState{} + clone[node] = &NUMANodeState{} clone[node].NumberOfAssignments = s.NumberOfAssignments - clone[node].Nodes = append([]int{}, s.Nodes...) + clone[node].Cells = append([]int{}, s.Cells...) if s.MemoryMap == nil { continue @@ -103,7 +103,7 @@ func (as ContainerMemoryAssignments) Clone() ContainerMemoryAssignments { // Reader interface used to read current memory/pod assignment state type Reader interface { // GetMachineState returns Memory Map stored in the State - GetMachineState() NodeMap + GetMachineState() NUMANodeMap // GetMemoryBlocks returns memory assignments of a container GetMemoryBlocks(podUID string, containerName string) []Block // GetMemoryAssignments returns ContainerMemoryAssignments @@ -111,8 +111,8 @@ type Reader interface { } type writer interface { - // SetMachineState stores NodeMap in State - SetMachineState(memoryMap NodeMap) + // SetMachineState stores NUMANodeMap in State + SetMachineState(memoryMap NUMANodeMap) // SetMemoryBlocks stores memory assignments of a container SetMemoryBlocks(podUID string, containerName string, blocks []Block) // SetMemoryAssignments sets ContainerMemoryAssignments by using the passed parameter diff --git a/pkg/kubelet/cm/memorymanager/state/state_checkpoint.go b/pkg/kubelet/cm/memorymanager/state/state_checkpoint.go index fd3dabd6d54d..303501f7b15e 100644 --- a/pkg/kubelet/cm/memorymanager/state/state_checkpoint.go +++ b/pkg/kubelet/cm/memorymanager/state/state_checkpoint.go @@ -103,7 +103,7 @@ func (sc *stateCheckpoint) storeState() error { } // GetMemoryState returns Memory Map stored in the State -func (sc *stateCheckpoint) GetMachineState() NodeMap { +func (sc *stateCheckpoint) GetMachineState() NUMANodeMap { sc.RLock() defer sc.RUnlock() @@ -126,8 +126,8 @@ func (sc *stateCheckpoint) GetMemoryAssignments() ContainerMemoryAssignments { return sc.cache.GetMemoryAssignments() } -// SetMachineState stores NodeMap in State -func (sc *stateCheckpoint) SetMachineState(memoryMap NodeMap) { +// SetMachineState stores NUMANodeMap in State +func (sc *stateCheckpoint) SetMachineState(memoryMap NUMANodeMap) { sc.Lock() defer sc.Unlock() diff --git a/pkg/kubelet/cm/memorymanager/state/state_checkpoint_test.go b/pkg/kubelet/cm/memorymanager/state/state_checkpoint_test.go index f0a762026599..a72918719890 100644 --- a/pkg/kubelet/cm/memorymanager/state/state_checkpoint_test.go +++ b/pkg/kubelet/cm/memorymanager/state/state_checkpoint_test.go @@ -62,9 +62,9 @@ func TestCheckpointStateRestore(t *testing.T) { "Restore valid checkpoint", `{ "policyName":"static", - "machineState":{"0":{"numberOfAssignments":0,"memoryMap":{"memory":{"total":2048,"systemReserved":512,"allocatable":1536,"reserved":512,"free":1024}},"nodes":[]}}, + "machineState":{"0":{"numberOfAssignments":0,"memoryMap":{"memory":{"total":2048,"systemReserved":512,"allocatable":1536,"reserved":512,"free":1024}},"cells":[]}}, "entries":{"pod":{"container1":[{"numaAffinity":[0],"type":"memory","size":512}]}}, - "checksum": 163710462 + "checksum": 4215593881 }`, containermap.ContainerMap{}, "", @@ -80,8 +80,8 @@ func TestCheckpointStateRestore(t *testing.T) { }, }, }, - machineState: NodeMap{ - 0: &NodeState{ + machineState: NUMANodeMap{ + 0: &NUMANodeState{ MemoryMap: map[v1.ResourceName]*MemoryTable{ v1.ResourceMemory: { Allocatable: 1536, @@ -99,7 +99,7 @@ func TestCheckpointStateRestore(t *testing.T) { "Restore checkpoint with invalid checksum", `{ "policyName":"static", - "machineState":{"0":{"numberOfAssignments":0,"memoryMap":{"memory":{"total":2048,"systemReserved":512,"allocatable":1536,"reserved":512,"free":1024}},"nodes":[]}}, + "machineState":{"0":{"numberOfAssignments":0,"memoryMap":{"memory":{"total":2048,"systemReserved":512,"allocatable":1536,"reserved":512,"free":1024}},"cells":[]}}, "entries":{"pod":{"container1":[{"affinity":[0],"type":"memory","size":512}]}}, "checksum": 101010 }`, @@ -164,8 +164,8 @@ func TestCheckpointStateStore(t *testing.T) { }, }, }, - machineState: NodeMap{ - 0: &NodeState{ + machineState: NUMANodeMap{ + 0: &NUMANodeState{ MemoryMap: map[v1.ResourceName]*MemoryTable{ v1.ResourceMemory: { Allocatable: 1536, @@ -208,7 +208,7 @@ func TestCheckpointStateStore(t *testing.T) { func TestCheckpointStateHelpers(t *testing.T) { testCases := []struct { description string - machineState NodeMap + machineState NUMANodeMap assignments ContainerMemoryAssignments }{ { @@ -224,8 +224,8 @@ func TestCheckpointStateHelpers(t *testing.T) { }, }, }, - machineState: NodeMap{ - 0: &NodeState{ + machineState: NUMANodeMap{ + 0: &NUMANodeState{ MemoryMap: map[v1.ResourceName]*MemoryTable{ v1.ResourceMemory: { Allocatable: 1536, @@ -235,7 +235,7 @@ func TestCheckpointStateHelpers(t *testing.T) { TotalMemSize: 2048, }, }, - Nodes: []int{}, + Cells: []int{}, }, }, }, @@ -259,8 +259,8 @@ func TestCheckpointStateHelpers(t *testing.T) { }, }, }, - machineState: NodeMap{ - 0: &NodeState{ + machineState: NUMANodeMap{ + 0: &NUMANodeState{ MemoryMap: map[v1.ResourceName]*MemoryTable{ v1.ResourceMemory: { Allocatable: 1536, @@ -270,7 +270,7 @@ func TestCheckpointStateHelpers(t *testing.T) { TotalMemSize: 2048, }, }, - Nodes: []int{}, + Cells: []int{}, }, }, }, @@ -281,8 +281,8 @@ func TestCheckpointStateHelpers(t *testing.T) { "container1": {}, }, }, - machineState: NodeMap{ - 0: &NodeState{ + machineState: NUMANodeMap{ + 0: &NUMANodeState{ MemoryMap: map[v1.ResourceName]*MemoryTable{ v1.ResourceMemory: { Allocatable: 1536, @@ -292,7 +292,7 @@ func TestCheckpointStateHelpers(t *testing.T) { TotalMemSize: 2048, }, }, - Nodes: []int{}, + Cells: []int{}, }, }, }, @@ -335,7 +335,7 @@ func TestCheckpointStateHelpers(t *testing.T) { func TestCheckpointStateClear(t *testing.T) { testCases := []struct { description string - machineState NodeMap + machineState NUMANodeMap assignments ContainerMemoryAssignments }{ { @@ -351,8 +351,8 @@ func TestCheckpointStateClear(t *testing.T) { }, }, }, - machineState: NodeMap{ - 0: &NodeState{ + machineState: NUMANodeMap{ + 0: &NUMANodeState{ MemoryMap: map[v1.ResourceName]*MemoryTable{ v1.ResourceMemory: { Allocatable: 1536, @@ -383,7 +383,7 @@ func TestCheckpointStateClear(t *testing.T) { state.SetMemoryAssignments(tc.assignments) state.ClearState() - assert.Equal(t, NodeMap{}, state.GetMachineState(), "cleared state with non-empty machine state") + assert.Equal(t, NUMANodeMap{}, state.GetMachineState(), "cleared state with non-empty machine state") assert.Equal(t, ContainerMemoryAssignments{}, state.GetMemoryAssignments(), "cleared state with non-empty memory assignments") }) } diff --git a/pkg/kubelet/cm/memorymanager/state/state_mem.go b/pkg/kubelet/cm/memorymanager/state/state_mem.go index a84dabcf27a1..119e4eb8a122 100644 --- a/pkg/kubelet/cm/memorymanager/state/state_mem.go +++ b/pkg/kubelet/cm/memorymanager/state/state_mem.go @@ -25,7 +25,7 @@ import ( type stateMemory struct { sync.RWMutex assignments ContainerMemoryAssignments - machineState NodeMap + machineState NUMANodeMap } var _ State = &stateMemory{} @@ -35,12 +35,12 @@ func NewMemoryState() State { klog.Infof("[memorymanager] initializing new in-memory state store") return &stateMemory{ assignments: ContainerMemoryAssignments{}, - machineState: NodeMap{}, + machineState: NUMANodeMap{}, } } // GetMemoryState returns Memory Map stored in the State -func (s *stateMemory) GetMachineState() NodeMap { +func (s *stateMemory) GetMachineState() NUMANodeMap { s.RLock() defer s.RUnlock() @@ -66,8 +66,8 @@ func (s *stateMemory) GetMemoryAssignments() ContainerMemoryAssignments { return s.assignments.Clone() } -// SetMachineState stores NodeMap in State -func (s *stateMemory) SetMachineState(nodeMap NodeMap) { +// SetMachineState stores NUMANodeMap in State +func (s *stateMemory) SetMachineState(nodeMap NUMANodeMap) { s.Lock() defer s.Unlock() @@ -117,7 +117,7 @@ func (s *stateMemory) ClearState() { s.Lock() defer s.Unlock() - s.machineState = NodeMap{} + s.machineState = NUMANodeMap{} s.assignments = make(ContainerMemoryAssignments) klog.V(2).Infof("[memorymanager] cleared state") } From f3d4ac2f7e9cbb851f1467accc6e0c61f9cbda5e Mon Sep 17 00:00:00 2001 From: Artyom Lukianov Date: Thu, 15 Oct 2020 21:00:38 +0300 Subject: [PATCH 21/31] memory manager: add basice e2e tests Provides basic tests e2e to verify that pod succeeds to start with MemoryManager enabled. Verifies both MemoryManager policies and when the node has multiple NUMA nodes it will verify the memory pinning. Signed-off-by: Artyom Lukianov --- test/e2e_node/BUILD | 2 + test/e2e_node/memory_manager_test.go | 363 +++++++++++++++++++++++++++ 2 files changed, 365 insertions(+) create mode 100644 test/e2e_node/memory_manager_test.go diff --git a/test/e2e_node/BUILD b/test/e2e_node/BUILD index 3425487b5299..c88d037abbe5 100644 --- a/test/e2e_node/BUILD +++ b/test/e2e_node/BUILD @@ -128,6 +128,7 @@ go_test( "hugepages_test.go", "image_id_test.go", "log_path_test.go", + "memory_manager_test.go", "mirror_pod_grace_period_test.go", "mirror_pod_test.go", "node_container_manager_test.go", @@ -226,6 +227,7 @@ go_test( "//vendor/github.com/onsi/gomega/types:go_default_library", "//vendor/github.com/prometheus/common/model:go_default_library", "//vendor/k8s.io/klog/v2:go_default_library", + "//vendor/k8s.io/utils/pointer:go_default_library", ] + select({ "@io_bazel_rules_go//go/platform:android": [ "//pkg/kubelet/stats/pidlimit:go_default_library", diff --git a/test/e2e_node/memory_manager_test.go b/test/e2e_node/memory_manager_test.go new file mode 100644 index 000000000000..96a24653d6f8 --- /dev/null +++ b/test/e2e_node/memory_manager_test.go @@ -0,0 +1,363 @@ +/* +Copyright 2017 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2enode + +import ( + "fmt" + "os/exec" + "regexp" + "sort" + "strconv" + "strings" + "time" + + "k8s.io/klog/v2" + + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config" + "k8s.io/kubernetes/pkg/kubelet/cm/cpuset" + "k8s.io/kubernetes/test/e2e/framework" + e2enode "k8s.io/kubernetes/test/e2e/framework/node" + e2epod "k8s.io/kubernetes/test/e2e/framework/pod" + "k8s.io/utils/pointer" + + "github.com/onsi/ginkgo" + "github.com/onsi/gomega" +) + +const ( + evictionHardMemory = "memory.available" + memoryManagerStateFile = "/var/lib/kubelet/memory_manager_state" + reservedLimit = "limit" + reservedNUMANode = "numa-node" + reservedType = "type" + resourceMemory = "memory" + staticPolicy = "static" + nonePolicy = "none" +) + +// Helper for makeMemoryManagerPod(). +type memoryManagerCtnAttributes struct { + ctnName string + cpus string + memory string + hugepages2Mi string +} + +// makeCPUMangerPod returns a pod with the provided ctnAttributes. +func makeMemoryManagerPod(podName string, ctnAttributes []memoryManagerCtnAttributes) *v1.Pod { + hugepagesMount := false + var containers []v1.Container + for _, ctnAttr := range ctnAttributes { + memsetCmd := fmt.Sprintf("grep Mems_allowed_list /proc/self/status | cut -f2 && sleep 1d") + ctn := v1.Container{ + Name: ctnAttr.ctnName, + Image: busyboxImage, + Resources: v1.ResourceRequirements{ + Limits: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse(ctnAttr.cpus), + v1.ResourceMemory: resource.MustParse(ctnAttr.memory), + }, + }, + Command: []string{"sh", "-c", memsetCmd}, + } + if ctnAttr.hugepages2Mi != "" { + hugepagesMount = true + + ctn.Resources.Limits[hugepagesResourceName2Mi] = resource.MustParse(ctnAttr.hugepages2Mi) + ctn.VolumeMounts = []v1.VolumeMount{ + { + Name: "hugepages-2mi", + MountPath: "/hugepages-2Mi", + }, + } + } + + containers = append(containers, ctn) + } + + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: podName, + }, + Spec: v1.PodSpec{ + RestartPolicy: v1.RestartPolicyNever, + Containers: containers, + }, + } + + if hugepagesMount { + pod.Spec.Volumes = []v1.Volume{ + { + Name: "hugepages-2mi", + VolumeSource: v1.VolumeSource{ + EmptyDir: &v1.EmptyDirVolumeSource{ + Medium: mediumHugepages2Mi, + }, + }, + }, + } + } + + return pod +} + +func deleteMemoryManagerStateFile() { + err := exec.Command("/bin/sh", "-c", fmt.Sprintf("rm -f %s", memoryManagerStateFile)).Run() + framework.ExpectNoError(err, "failed to delete the state file") +} + +type kubeletParams struct { + memoryManagerFeatureGate bool + memoryManagerPolicy string + systemReservedMemory []map[string]string + systemReserved map[string]string + kubeReserved map[string]string + evictionHard map[string]string +} + +func getUpdatedKubeletConfig(oldCfg *kubeletconfig.KubeletConfiguration, params *kubeletParams) *kubeletconfig.KubeletConfiguration { + newCfg := oldCfg.DeepCopy() + + if newCfg.FeatureGates == nil { + newCfg.FeatureGates = map[string]bool{} + } + newCfg.FeatureGates["MemoryManager"] = params.memoryManagerFeatureGate + newCfg.MemoryManagerPolicy = params.memoryManagerPolicy + + // update system-reserved + if newCfg.SystemReserved == nil { + newCfg.SystemReserved = map[string]string{} + } + for resourceName, value := range params.systemReserved { + newCfg.SystemReserved[resourceName] = value + } + + // update kube-reserved + if newCfg.KubeReserved == nil { + newCfg.KubeReserved = map[string]string{} + } + for resourceName, value := range params.kubeReserved { + newCfg.KubeReserved[resourceName] = value + } + + // update hard eviction threshold + if newCfg.EvictionHard == nil { + newCfg.EvictionHard = map[string]string{} + } + for resourceName, value := range params.evictionHard { + newCfg.EvictionHard[resourceName] = value + } + + // update reserved memory + if newCfg.ReservedMemory == nil { + newCfg.ReservedMemory = []map[string]string{} + } + for _, p := range params.systemReservedMemory { + newCfg.ReservedMemory = append(newCfg.ReservedMemory, p) + } + + return newCfg +} + +func updateKubeletConfig(f *framework.Framework, cfg *kubeletconfig.KubeletConfiguration) { + // remove the state file + deleteMemoryManagerStateFile() + + // Update the Kubelet configuration + framework.ExpectNoError(setKubeletConfiguration(f, cfg)) + + // Wait for the Kubelet to be ready. + gomega.Eventually(func() bool { + nodes, err := e2enode.TotalReady(f.ClientSet) + framework.ExpectNoError(err) + return nodes == 1 + }, time.Minute, time.Second).Should(gomega.BeTrue()) +} + +func getAllNUMANodes() []int { + outData, err := exec.Command("/bin/sh", "-c", "lscpu").Output() + framework.ExpectNoError(err) + + numaNodeRegex, err := regexp.Compile(`NUMA node(\d+) CPU\(s\):`) + framework.ExpectNoError(err) + + matches := numaNodeRegex.FindAllSubmatch(outData, -1) + + var numaNodes []int + for _, m := range matches { + n, err := strconv.Atoi(string(m[1])) + framework.ExpectNoError(err) + + numaNodes = append(numaNodes, n) + } + + sort.Ints(numaNodes) + return numaNodes +} + +// Serial because the test updates kubelet configuration. +var _ = SIGDescribe("Memory Manager [Serial] [Feature:MemoryManager][NodeAlphaFeature:MemoryManager]", func() { + // TODO: add more complex tests that will include interaction between CPUManager, MemoryManager and TopologyManager + var ( + allNUMANodes []int + ctnParams []memoryManagerCtnAttributes + is2MiHugepagesSupported *bool + isMultiNUMASupported *bool + kubeParams *kubeletParams + oldCfg *kubeletconfig.KubeletConfiguration + testPod *v1.Pod + ) + + f := framework.NewDefaultFramework("memory-manager-test") + defaultKubeParams := &kubeletParams{ + memoryManagerFeatureGate: true, + systemReservedMemory: []map[string]string{ + {reservedNUMANode: "0", reservedType: resourceMemory, reservedLimit: "1100Mi"}, + }, + systemReserved: map[string]string{resourceMemory: "500Mi"}, + kubeReserved: map[string]string{resourceMemory: "500Mi"}, + evictionHard: map[string]string{evictionHardMemory: "100Mi"}, + } + + verifyMemoryPinning := func(numaNodeIDs []int) { + ginkgo.By("Verifying the NUMA pinning") + + output, err := e2epod.GetPodLogs(f.ClientSet, f.Namespace.Name, testPod.Name, testPod.Spec.Containers[0].Name) + framework.ExpectNoError(err) + + currentNUMANodeIDs, err := cpuset.Parse(strings.Trim(output, "\n")) + framework.ExpectNoError(err) + + framework.ExpectEqual(numaNodeIDs, currentNUMANodeIDs.ToSlice()) + } + + ginkgo.BeforeEach(func() { + if isMultiNUMASupported == nil { + isMultiNUMASupported = pointer.BoolPtr(isMultiNUMA()) + } + + if is2MiHugepagesSupported == nil { + is2MiHugepagesSupported = pointer.BoolPtr(isHugePageAvailable(hugepagesSize2M)) + } + + if len(allNUMANodes) == 0 { + allNUMANodes = getAllNUMANodes() + } + }) + + // dynamically update the kubelet configuration + ginkgo.JustBeforeEach(func() { + var err error + + // allocate hugepages + if *is2MiHugepagesSupported { + err := configureHugePages(hugepagesSize2M, 256) + framework.ExpectNoError(err) + } + + // get the old kubelet config + oldCfg, err = getCurrentKubeletConfig() + framework.ExpectNoError(err) + + // update the kubelet config with new parameters + newCfg := getUpdatedKubeletConfig(oldCfg, kubeParams) + updateKubeletConfig(f, newCfg) + + // request hugepages resources under the container + if *is2MiHugepagesSupported { + for i := 0; i < len(ctnParams); i++ { + ctnParams[i].hugepages2Mi = "128Mi" + } + } + + testPod = makeMemoryManagerPod(ctnParams[0].ctnName, ctnParams) + }) + + ginkgo.JustAfterEach(func() { + // delete the test pod + f.PodClient().DeleteSync(testPod.Name, metav1.DeleteOptions{}, time.Minute) + + // release hugepages + if err := configureHugePages(hugepagesSize2M, 0); err != nil { + klog.Errorf("failed to release hugepages: %v", err) + } + + // update the kubelet config with old values + updateKubeletConfig(f, oldCfg) + }) + + ginkgo.Context("with static policy", func() { + ginkgo.BeforeEach(func() { + // override kubelet configuration parameters + tmpParams := *defaultKubeParams + tmpParams.memoryManagerPolicy = staticPolicy + kubeParams = &tmpParams + + // override pod parameters + ctnParams = []memoryManagerCtnAttributes{ + { + ctnName: "memory-manager-static", + cpus: "100m", + memory: "128Mi", + }, + } + }) + + ginkgo.It("should succeed to start the pod", func() { + ginkgo.By("Running the test pod") + testPod = f.PodClient().CreateSync(testPod) + + // it no taste to verify NUMA pinning when the node has only one NUMA node + if !*isMultiNUMASupported { + return + } + + verifyMemoryPinning([]int{0}) + }) + }) + + ginkgo.Context("with none policy", func() { + ginkgo.BeforeEach(func() { + tmpParams := *defaultKubeParams + tmpParams.memoryManagerPolicy = nonePolicy + kubeParams = &tmpParams + + // override pod parameters + ctnParams = []memoryManagerCtnAttributes{ + { + ctnName: "memory-manager-none", + cpus: "100m", + memory: "128Mi", + }, + } + }) + + ginkgo.It("should succeed to start the pod", func() { + testPod = f.PodClient().CreateSync(testPod) + + // it no taste to verify NUMA pinning when the node has only one NUMA node + if !*isMultiNUMASupported { + return + } + + verifyMemoryPinning(allNUMANodes) + }) + }) +}) From 606fea29f500bd7cc85801f4fffe51d4b63cf2ef Mon Sep 17 00:00:00 2001 From: Pawel Rapacz Date: Wed, 4 Nov 2020 19:21:57 +0100 Subject: [PATCH 22/31] memory manager: add e2e test to run guaranteed pod with init containers Signed-off-by: Pawel Rapacz --- test/e2e_node/memory_manager_test.go | 118 ++++++++++++++++++++------- 1 file changed, 88 insertions(+), 30 deletions(-) diff --git a/test/e2e_node/memory_manager_test.go b/test/e2e_node/memory_manager_test.go index 96a24653d6f8..3f9716cdded3 100644 --- a/test/e2e_node/memory_manager_test.go +++ b/test/e2e_node/memory_manager_test.go @@ -60,12 +60,11 @@ type memoryManagerCtnAttributes struct { hugepages2Mi string } -// makeCPUMangerPod returns a pod with the provided ctnAttributes. -func makeMemoryManagerPod(podName string, ctnAttributes []memoryManagerCtnAttributes) *v1.Pod { +// makeMemoryManagerContainers returns slice of containers with provided attributes and indicator of hugepages mount needed for those. +func makeMemoryManagerContainers(ctnCmd string, ctnAttributes []memoryManagerCtnAttributes) ([]v1.Container, bool) { hugepagesMount := false var containers []v1.Container for _, ctnAttr := range ctnAttributes { - memsetCmd := fmt.Sprintf("grep Mems_allowed_list /proc/self/status | cut -f2 && sleep 1d") ctn := v1.Container{ Name: ctnAttr.ctnName, Image: busyboxImage, @@ -75,7 +74,7 @@ func makeMemoryManagerPod(podName string, ctnAttributes []memoryManagerCtnAttrib v1.ResourceMemory: resource.MustParse(ctnAttr.memory), }, }, - Command: []string{"sh", "-c", memsetCmd}, + Command: []string{"sh", "-c", ctnCmd}, } if ctnAttr.hugepages2Mi != "" { hugepagesMount = true @@ -92,13 +91,28 @@ func makeMemoryManagerPod(podName string, ctnAttributes []memoryManagerCtnAttrib containers = append(containers, ctn) } + return containers, hugepagesMount +} + +// makeMemoryMangerPod returns a pod with the provided ctnAttributes. +func makeMemoryManagerPod(podName string, initCtnAttributes, ctnAttributes []memoryManagerCtnAttributes) *v1.Pod { + hugepagesMount := false + memsetCmd := fmt.Sprintf("grep Mems_allowed_list /proc/self/status | cut -f2") + memsetSleepCmd := memsetCmd + "&& sleep 1d" + var containers, initContainers []v1.Container + if len(initCtnAttributes) > 0 { + initContainers, _ = makeMemoryManagerContainers(memsetCmd, initCtnAttributes) + } + containers, hugepagesMount = makeMemoryManagerContainers(memsetSleepCmd, ctnAttributes) + pod := &v1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: podName, }, Spec: v1.PodSpec{ - RestartPolicy: v1.RestartPolicyNever, - Containers: containers, + RestartPolicy: v1.RestartPolicyNever, + Containers: containers, + InitContainers: initContainers, }, } @@ -216,13 +230,13 @@ func getAllNUMANodes() []int { var _ = SIGDescribe("Memory Manager [Serial] [Feature:MemoryManager][NodeAlphaFeature:MemoryManager]", func() { // TODO: add more complex tests that will include interaction between CPUManager, MemoryManager and TopologyManager var ( - allNUMANodes []int - ctnParams []memoryManagerCtnAttributes - is2MiHugepagesSupported *bool - isMultiNUMASupported *bool - kubeParams *kubeletParams - oldCfg *kubeletconfig.KubeletConfiguration - testPod *v1.Pod + allNUMANodes []int + ctnParams, initCtnParams []memoryManagerCtnAttributes + is2MiHugepagesSupported *bool + isMultiNUMASupported *bool + kubeParams *kubeletParams + oldCfg *kubeletconfig.KubeletConfiguration + testPod *v1.Pod ) f := framework.NewDefaultFramework("memory-manager-test") @@ -287,7 +301,7 @@ var _ = SIGDescribe("Memory Manager [Serial] [Feature:MemoryManager][NodeAlphaFe } } - testPod = makeMemoryManagerPod(ctnParams[0].ctnName, ctnParams) + testPod = makeMemoryManagerPod(ctnParams[0].ctnName, initCtnParams, ctnParams) }) ginkgo.JustAfterEach(func() { @@ -309,27 +323,71 @@ var _ = SIGDescribe("Memory Manager [Serial] [Feature:MemoryManager][NodeAlphaFe tmpParams := *defaultKubeParams tmpParams.memoryManagerPolicy = staticPolicy kubeParams = &tmpParams + }) - // override pod parameters - ctnParams = []memoryManagerCtnAttributes{ - { - ctnName: "memory-manager-static", - cpus: "100m", - memory: "128Mi", - }, - } + ginkgo.JustAfterEach(func() { + // reset containers attributes + ctnParams = []memoryManagerCtnAttributes{} + initCtnParams = []memoryManagerCtnAttributes{} }) - ginkgo.It("should succeed to start the pod", func() { - ginkgo.By("Running the test pod") - testPod = f.PodClient().CreateSync(testPod) + ginkgo.When("pod has init and app containers", func() { + ginkgo.BeforeEach(func() { + // override containers parameters + ctnParams = []memoryManagerCtnAttributes{ + { + ctnName: "memory-manager-static", + cpus: "100m", + memory: "128Mi", + }, + } + // override init container parameters + initCtnParams = []memoryManagerCtnAttributes{ + { + ctnName: "init-memory-manager-static", + cpus: "100m", + memory: "128Mi", + }, + } + }) - // it no taste to verify NUMA pinning when the node has only one NUMA node - if !*isMultiNUMASupported { - return - } + ginkgo.It("should succeed to start the pod", func() { + ginkgo.By("Running the test pod") + testPod = f.PodClient().CreateSync(testPod) + + // it no taste to verify NUMA pinning when the node has only one NUMA node + if !*isMultiNUMASupported { + return + } + + verifyMemoryPinning([]int{0}) + }) + }) + + ginkgo.When("pod has only app containers", func() { + + ginkgo.BeforeEach(func() { + // override containers parameters + ctnParams = []memoryManagerCtnAttributes{ + { + ctnName: "memory-manager-static", + cpus: "100m", + memory: "128Mi", + }, + } + }) + + ginkgo.It("should succeed to start the pod", func() { + ginkgo.By("Running the test pod") + testPod = f.PodClient().CreateSync(testPod) + + // it no taste to verify NUMA pinning when the node has only one NUMA node + if !*isMultiNUMASupported { + return + } - verifyMemoryPinning([]int{0}) + verifyMemoryPinning([]int{0}) + }) }) }) From 74eeef2a0a3080d1f1d1f0ae8e92a35544feb5d6 Mon Sep 17 00:00:00 2001 From: Artyom Lukianov Date: Thu, 5 Nov 2020 16:43:49 +0200 Subject: [PATCH 23/31] memory manager: provide additional e2e tests With the memory manager static policy: - start multiple guaranteed pods and verify that pods succeeded to start - start workload pod on each NUMA node to load the memory and start the pod that requested more memory than each NUMA node have, the pod should fail to start with the admission error, because no single NUMA node has enough memory to start the pod and also each NUMA node already used for single NUMA node allocation The test requires at least two NUMA nodes Signed-off-by: Artyom Lukianov --- test/e2e_node/BUILD | 1 + test/e2e_node/memory_manager_test.go | 174 ++++++++++++++++++++++++--- 2 files changed, 159 insertions(+), 16 deletions(-) diff --git a/test/e2e_node/BUILD b/test/e2e_node/BUILD index c88d037abbe5..231f1402ee4b 100644 --- a/test/e2e_node/BUILD +++ b/test/e2e_node/BUILD @@ -161,6 +161,7 @@ go_test( "//pkg/kubelet/cm/cpumanager:go_default_library", "//pkg/kubelet/cm/cpumanager/state:go_default_library", "//pkg/kubelet/cm/cpuset:go_default_library", + "//pkg/kubelet/cm/memorymanager/state:go_default_library", "//pkg/kubelet/cm/topologymanager:go_default_library", "//pkg/kubelet/container:go_default_library", "//pkg/kubelet/events:go_default_library", diff --git a/test/e2e_node/memory_manager_test.go b/test/e2e_node/memory_manager_test.go index 3f9716cdded3..fb02b8aed463 100644 --- a/test/e2e_node/memory_manager_test.go +++ b/test/e2e_node/memory_manager_test.go @@ -17,7 +17,10 @@ limitations under the License. package e2enode import ( + "context" + "encoding/json" "fmt" + "os" "os/exec" "regexp" "sort" @@ -25,13 +28,12 @@ import ( "strings" "time" - "k8s.io/klog/v2" - v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config" "k8s.io/kubernetes/pkg/kubelet/cm/cpuset" + "k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state" "k8s.io/kubernetes/test/e2e/framework" e2enode "k8s.io/kubernetes/test/e2e/framework/node" e2epod "k8s.io/kubernetes/test/e2e/framework/pod" @@ -97,7 +99,7 @@ func makeMemoryManagerContainers(ctnCmd string, ctnAttributes []memoryManagerCtn // makeMemoryMangerPod returns a pod with the provided ctnAttributes. func makeMemoryManagerPod(podName string, initCtnAttributes, ctnAttributes []memoryManagerCtnAttributes) *v1.Pod { hugepagesMount := false - memsetCmd := fmt.Sprintf("grep Mems_allowed_list /proc/self/status | cut -f2") + memsetCmd := "grep Mems_allowed_list /proc/self/status | cut -f2" memsetSleepCmd := memsetCmd + "&& sleep 1d" var containers, initContainers []v1.Container if len(initCtnAttributes) > 0 { @@ -107,7 +109,7 @@ func makeMemoryManagerPod(podName string, initCtnAttributes, ctnAttributes []mem pod := &v1.Pod{ ObjectMeta: metav1.ObjectMeta{ - Name: podName, + GenerateName: podName, }, Spec: v1.PodSpec{ RestartPolicy: v1.RestartPolicyNever, @@ -137,6 +139,23 @@ func deleteMemoryManagerStateFile() { framework.ExpectNoError(err, "failed to delete the state file") } +func getMemoryManagerState() (*state.MemoryManagerCheckpoint, error) { + if _, err := os.Stat(memoryManagerStateFile); os.IsNotExist(err) { + return nil, fmt.Errorf("the memory manager state file %s does not exist", memoryManagerStateFile) + } + + out, err := exec.Command("/bin/sh", "-c", fmt.Sprintf("cat %s", memoryManagerStateFile)).Output() + if err != nil { + return nil, fmt.Errorf("failed to run command 'cat %s': out: %s, err: %v", memoryManagerStateFile, out, err) + } + + memoryManagerCheckpoint := &state.MemoryManagerCheckpoint{} + if err := json.Unmarshal(out, memoryManagerCheckpoint); err != nil { + return nil, fmt.Errorf("failed to unmarshal memory manager state file: %v", err) + } + return memoryManagerCheckpoint, nil +} + type kubeletParams struct { memoryManagerFeatureGate bool memoryManagerPolicy string @@ -250,10 +269,10 @@ var _ = SIGDescribe("Memory Manager [Serial] [Feature:MemoryManager][NodeAlphaFe evictionHard: map[string]string{evictionHardMemory: "100Mi"}, } - verifyMemoryPinning := func(numaNodeIDs []int) { + verifyMemoryPinning := func(pod *v1.Pod, numaNodeIDs []int) { ginkgo.By("Verifying the NUMA pinning") - output, err := e2epod.GetPodLogs(f.ClientSet, f.Namespace.Name, testPod.Name, testPod.Spec.Containers[0].Name) + output, err := e2epod.GetPodLogs(f.ClientSet, f.Namespace.Name, pod.Name, pod.Spec.Containers[0].Name) framework.ExpectNoError(err) currentNUMANodeIDs, err := cpuset.Parse(strings.Trim(output, "\n")) @@ -306,12 +325,14 @@ var _ = SIGDescribe("Memory Manager [Serial] [Feature:MemoryManager][NodeAlphaFe ginkgo.JustAfterEach(func() { // delete the test pod - f.PodClient().DeleteSync(testPod.Name, metav1.DeleteOptions{}, time.Minute) + if testPod.Name != "" { + f.PodClient().DeleteSync(testPod.Name, metav1.DeleteOptions{}, 2*time.Minute) + } // release hugepages - if err := configureHugePages(hugepagesSize2M, 0); err != nil { - klog.Errorf("failed to release hugepages: %v", err) - } + gomega.Eventually(func() error { + return configureHugePages(hugepagesSize2M, 0) + }, 90*time.Second, 15*time.Second).ShouldNot(gomega.HaveOccurred(), "failed to release hugepages") // update the kubelet config with old values updateKubeletConfig(f, oldCfg) @@ -331,7 +352,7 @@ var _ = SIGDescribe("Memory Manager [Serial] [Feature:MemoryManager][NodeAlphaFe initCtnParams = []memoryManagerCtnAttributes{} }) - ginkgo.When("pod has init and app containers", func() { + ginkgo.When("guaranteed pod has init and app containers", func() { ginkgo.BeforeEach(func() { // override containers parameters ctnParams = []memoryManagerCtnAttributes{ @@ -360,12 +381,11 @@ var _ = SIGDescribe("Memory Manager [Serial] [Feature:MemoryManager][NodeAlphaFe return } - verifyMemoryPinning([]int{0}) + verifyMemoryPinning(testPod, []int{0}) }) }) - ginkgo.When("pod has only app containers", func() { - + ginkgo.When("guaranteed pod has only app containers", func() { ginkgo.BeforeEach(func() { // override containers parameters ctnParams = []memoryManagerCtnAttributes{ @@ -386,7 +406,129 @@ var _ = SIGDescribe("Memory Manager [Serial] [Feature:MemoryManager][NodeAlphaFe return } - verifyMemoryPinning([]int{0}) + verifyMemoryPinning(testPod, []int{0}) + }) + }) + + ginkgo.When("multiple guaranteed pods started", func() { + var testPod2 *v1.Pod + + ginkgo.BeforeEach(func() { + // override containers parameters + ctnParams = []memoryManagerCtnAttributes{ + { + ctnName: "memory-manager-static", + cpus: "100m", + memory: "128Mi", + }, + } + }) + + ginkgo.JustBeforeEach(func() { + testPod2 = makeMemoryManagerPod("memory-manager-static", initCtnParams, ctnParams) + }) + + ginkgo.It("should succeed to start all pods", func() { + ginkgo.By("Running the test pod and the test pod 2") + testPod = f.PodClient().CreateSync(testPod) + + ginkgo.By("Running the test pod 2") + testPod2 = f.PodClient().CreateSync(testPod2) + + // it no taste to verify NUMA pinning when the node has only one NUMA node + if !*isMultiNUMASupported { + return + } + + verifyMemoryPinning(testPod, []int{0}) + verifyMemoryPinning(testPod2, []int{0}) + }) + + ginkgo.JustAfterEach(func() { + // delete the test pod 2 + if testPod2.Name != "" { + f.PodClient().DeleteSync(testPod2.Name, metav1.DeleteOptions{}, 2*time.Minute) + } + }) + }) + + // the test requires at least two NUMA nodes + // test on each NUMA node will start the pod that will consume almost all memory of the NUMA node except 256Mi + // after it will start an additional pod with the memory request that can not be satisfied by the single NUMA node + // free memory + ginkgo.When("guaranteed pod memory request is bigger than free memory on each NUMA node", func() { + var workloadPods []*v1.Pod + + ginkgo.BeforeEach(func() { + if !*isMultiNUMASupported { + ginkgo.Skip("The machines has less than two NUMA nodes") + } + + ctnParams = []memoryManagerCtnAttributes{ + { + ctnName: "memory-manager-static", + cpus: "100m", + memory: "384Mi", + }, + } + }) + + ginkgo.JustBeforeEach(func() { + stateData, err := getMemoryManagerState() + framework.ExpectNoError(err) + + for _, memoryState := range stateData.MachineState { + // consume all memory except of 256Mi on each NUMA node via workload pods + workloadPodMemory := memoryState.MemoryMap[v1.ResourceMemory].Free - 256*1024*1024 + memoryQuantity := resource.NewQuantity(int64(workloadPodMemory), resource.BinarySI) + workloadCtnAttrs := []memoryManagerCtnAttributes{ + { + ctnName: "workload-pod", + cpus: "100m", + memory: memoryQuantity.String(), + }, + } + workloadPod := makeMemoryManagerPod(workloadCtnAttrs[0].ctnName, initCtnParams, workloadCtnAttrs) + + workloadPod = f.PodClient().CreateSync(workloadPod) + workloadPods = append(workloadPods, workloadPod) + } + }) + + ginkgo.It("should be rejected", func() { + ginkgo.By("Creating the pod") + testPod = f.PodClient().Create(testPod) + + ginkgo.By("Checking that pod failed to start because of admission error") + gomega.Eventually(func() bool { + tmpPod, err := f.PodClient().Get(context.TODO(), testPod.Name, metav1.GetOptions{}) + framework.ExpectNoError(err) + + if tmpPod.Status.Phase != v1.PodFailed { + return false + } + + if tmpPod.Status.Reason != "UnexpectedAdmissionError" { + return false + } + + if !strings.Contains(tmpPod.Status.Message, "Pod Allocate failed due to [memorymanager]") { + return false + } + + return true + }, time.Minute, 5*time.Second).Should( + gomega.Equal(true), + "the pod succeeded to start, when it should fail with the admission error", + ) + }) + + ginkgo.JustAfterEach(func() { + for _, workloadPod := range workloadPods { + if workloadPod.Name != "" { + f.PodClient().DeleteSync(workloadPod.Name, metav1.DeleteOptions{}, 2*time.Minute) + } + } }) }) }) @@ -415,7 +557,7 @@ var _ = SIGDescribe("Memory Manager [Serial] [Feature:MemoryManager][NodeAlphaFe return } - verifyMemoryPinning(allNUMANodes) + verifyMemoryPinning(testPod, allNUMANodes) }) }) }) From ff2a110920834d2b866f54421414029381cc2c56 Mon Sep 17 00:00:00 2001 From: Artyom Lukianov Date: Thu, 12 Nov 2020 13:58:12 +0200 Subject: [PATCH 24/31] memory manager: provide the new type to contain resources for each NUMA node Signed-off-by: Artyom Lukianov --- cmd/kubelet/app/server.go | 4 +- pkg/kubelet/cm/BUILD | 3 +- pkg/kubelet/cm/container_manager.go | 4 +- pkg/kubelet/cm/memorymanager/BUILD | 2 + .../cm/memorymanager/memory_manager.go | 11 ++--- .../cm/memorymanager/memory_manager_test.go | 45 ++++++++++--------- pkg/kubelet/types/BUILD | 1 + pkg/kubelet/types/types.go | 4 ++ 8 files changed, 41 insertions(+), 33 deletions(-) diff --git a/cmd/kubelet/app/server.go b/cmd/kubelet/app/server.go index 127dfc84f8b6..42f0167ea1dc 100644 --- a/cmd/kubelet/app/server.go +++ b/cmd/kubelet/app/server.go @@ -1305,7 +1305,7 @@ func parseResourceList(m map[string]string) (v1.ResourceList, error) { return rl, nil } -func parseReservedMemoryConfig(config []map[string]string) (map[int]map[v1.ResourceName]resource.Quantity, error) { +func parseReservedMemoryConfig(config []map[string]string) (kubetypes.NUMANodeResources, error) { if len(config) == 0 { return nil, nil } @@ -1327,7 +1327,7 @@ func parseReservedMemoryConfig(config []map[string]string) (map[int]map[v1.Resou } } - parsed := make(map[int]map[v1.ResourceName]resource.Quantity, len(config)) + parsed := make(kubetypes.NUMANodeResources, len(config)) for _, m := range config { idxInString, _ := m[indexKey] idx, err := strconv.Atoi(idxInString) diff --git a/pkg/kubelet/cm/BUILD b/pkg/kubelet/cm/BUILD index 8c6a70c81077..4ac0d10c67bc 100644 --- a/pkg/kubelet/cm/BUILD +++ b/pkg/kubelet/cm/BUILD @@ -41,6 +41,7 @@ go_library( "//pkg/kubelet/lifecycle:go_default_library", "//pkg/kubelet/pluginmanager/cache:go_default_library", "//pkg/kubelet/status:go_default_library", + "//pkg/kubelet/types:go_default_library", "//pkg/scheduler/framework:go_default_library", "//staging/src/k8s.io/api/core/v1:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library", @@ -69,7 +70,6 @@ go_library( "//pkg/kubelet/metrics:go_default_library", "//pkg/kubelet/qos:go_default_library", "//pkg/kubelet/stats/pidlimit:go_default_library", - "//pkg/kubelet/types:go_default_library", "//pkg/util/oom:go_default_library", "//pkg/util/procfs:go_default_library", "//pkg/util/sysctl:go_default_library", @@ -130,7 +130,6 @@ go_library( "//pkg/kubelet/metrics:go_default_library", "//pkg/kubelet/qos:go_default_library", "//pkg/kubelet/stats/pidlimit:go_default_library", - "//pkg/kubelet/types:go_default_library", "//pkg/util/oom:go_default_library", "//pkg/util/procfs:go_default_library", "//pkg/util/sysctl:go_default_library", diff --git a/pkg/kubelet/cm/container_manager.go b/pkg/kubelet/cm/container_manager.go index fcebd79fd758..d72c16630eec 100644 --- a/pkg/kubelet/cm/container_manager.go +++ b/pkg/kubelet/cm/container_manager.go @@ -19,7 +19,6 @@ package cm import ( "time" - "k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/util/sets" // TODO: Migrate kubelet to either use its own internal objects or client library. v1 "k8s.io/api/core/v1" @@ -32,6 +31,7 @@ import ( "k8s.io/kubernetes/pkg/kubelet/lifecycle" "k8s.io/kubernetes/pkg/kubelet/pluginmanager/cache" "k8s.io/kubernetes/pkg/kubelet/status" + kubetypes "k8s.io/kubernetes/pkg/kubelet/types" schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework" "fmt" @@ -137,7 +137,7 @@ type NodeConfig struct { ExperimentalTopologyManagerScope string ExperimentalCPUManagerReconcilePeriod time.Duration ExperimentalMemoryManagerPolicy string - ExperimentalMemoryManagerReservedMemory map[int]map[v1.ResourceName]resource.Quantity + ExperimentalMemoryManagerReservedMemory kubetypes.NUMANodeResources ExperimentalPodPidsLimit int64 EnforceCPULimits bool CPUCFSQuotaPeriod time.Duration diff --git a/pkg/kubelet/cm/memorymanager/BUILD b/pkg/kubelet/cm/memorymanager/BUILD index 2ff5be7a9a21..200bab3364a4 100644 --- a/pkg/kubelet/cm/memorymanager/BUILD +++ b/pkg/kubelet/cm/memorymanager/BUILD @@ -20,6 +20,7 @@ go_library( "//pkg/kubelet/cm/topologymanager/bitmask:go_default_library", "//pkg/kubelet/config:go_default_library", "//pkg/kubelet/status:go_default_library", + "//pkg/kubelet/types:go_default_library", "//staging/src/k8s.io/api/core/v1:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library", "//staging/src/k8s.io/cri-api/pkg/apis/runtime/v1alpha2:go_default_library", @@ -40,6 +41,7 @@ go_test( "//pkg/kubelet/cm/memorymanager/state:go_default_library", "//pkg/kubelet/cm/topologymanager:go_default_library", "//pkg/kubelet/cm/topologymanager/bitmask:go_default_library", + "//pkg/kubelet/types:go_default_library", "//staging/src/k8s.io/api/core/v1:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", diff --git a/pkg/kubelet/cm/memorymanager/memory_manager.go b/pkg/kubelet/cm/memorymanager/memory_manager.go index 058f3ad922e4..4909db4383b8 100644 --- a/pkg/kubelet/cm/memorymanager/memory_manager.go +++ b/pkg/kubelet/cm/memorymanager/memory_manager.go @@ -34,6 +34,7 @@ import ( "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" "k8s.io/kubernetes/pkg/kubelet/config" "k8s.io/kubernetes/pkg/kubelet/status" + kubetypes "k8s.io/kubernetes/pkg/kubelet/types" ) // memoryManagerStateFileName is the file name where memory manager stores its state @@ -118,7 +119,7 @@ type manager struct { var _ Manager = &manager{} // NewManager returns new instance of the memory manager -func NewManager(policyName string, machineInfo *cadvisorapi.MachineInfo, nodeAllocatableReservation v1.ResourceList, reservedMemory map[int]map[v1.ResourceName]resource.Quantity, stateFileDirectory string, affinity topologymanager.Store) (Manager, error) { +func NewManager(policyName string, machineInfo *cadvisorapi.MachineInfo, nodeAllocatableReservation v1.ResourceList, reservedMemory kubetypes.NUMANodeResources, stateFileDirectory string, affinity topologymanager.Store) (Manager, error) { var policy Policy switch policyType(policyName) { @@ -320,7 +321,7 @@ func (m *manager) policyRemoveContainerByRef(podUID string, containerName string return err } -func getTotalMemoryTypeReserved(machineInfo *cadvisorapi.MachineInfo, reservedMemory map[int]map[v1.ResourceName]resource.Quantity) map[v1.ResourceName]resource.Quantity { +func getTotalMemoryTypeReserved(machineInfo *cadvisorapi.MachineInfo, reservedMemory kubetypes.NUMANodeResources) map[v1.ResourceName]resource.Quantity { totalMemoryType := map[v1.ResourceName]resource.Quantity{} numaNodes := map[int]bool{} @@ -345,7 +346,7 @@ func getTotalMemoryTypeReserved(machineInfo *cadvisorapi.MachineInfo, reservedMe return totalMemoryType } -func validateReservedMemory(machineInfo *cadvisorapi.MachineInfo, nodeAllocatableReservation v1.ResourceList, reservedMemory map[int]map[v1.ResourceName]resource.Quantity) error { +func validateReservedMemory(machineInfo *cadvisorapi.MachineInfo, nodeAllocatableReservation v1.ResourceList, reservedMemory kubetypes.NUMANodeResources) error { totalMemoryType := getTotalMemoryTypeReserved(machineInfo, reservedMemory) commonMemoryTypeSet := make(map[v1.ResourceName]bool) @@ -381,7 +382,7 @@ func validateReservedMemory(machineInfo *cadvisorapi.MachineInfo, nodeAllocatabl return nil } -func convertReserved(machineInfo *cadvisorapi.MachineInfo, reservedMemory map[int]map[v1.ResourceName]resource.Quantity) (systemReservedMemory, error) { +func convertReserved(machineInfo *cadvisorapi.MachineInfo, reservedMemory kubetypes.NUMANodeResources) (systemReservedMemory, error) { preReservedMemoryConverted := make(map[int]map[v1.ResourceName]uint64) for _, node := range machineInfo.Topology { preReservedMemoryConverted[node.Id] = make(map[v1.ResourceName]uint64) @@ -401,7 +402,7 @@ func convertReserved(machineInfo *cadvisorapi.MachineInfo, reservedMemory map[in return preReservedMemoryConverted, nil } -func getSystemReservedMemory(machineInfo *cadvisorapi.MachineInfo, nodeAllocatableReservation v1.ResourceList, preReservedMemory map[int]map[v1.ResourceName]resource.Quantity) (systemReservedMemory, error) { +func getSystemReservedMemory(machineInfo *cadvisorapi.MachineInfo, nodeAllocatableReservation v1.ResourceList, preReservedMemory kubetypes.NUMANodeResources) (systemReservedMemory, error) { if err := validateReservedMemory(machineInfo, nodeAllocatableReservation, preReservedMemory); err != nil { return nil, err } diff --git a/pkg/kubelet/cm/memorymanager/memory_manager_test.go b/pkg/kubelet/cm/memorymanager/memory_manager_test.go index c503812046d1..e336b9297190 100644 --- a/pkg/kubelet/cm/memorymanager/memory_manager_test.go +++ b/pkg/kubelet/cm/memorymanager/memory_manager_test.go @@ -34,6 +34,7 @@ import ( "k8s.io/kubernetes/pkg/kubelet/cm/containermap" "k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" + kubetypes "k8s.io/kubernetes/pkg/kubelet/types" ) const ( @@ -58,7 +59,7 @@ type testMemoryManager struct { nodeAllocatableReservation v1.ResourceList policyName policyType affinity topologymanager.Store - systemReservedMemory map[int]map[v1.ResourceName]resource.Quantity + systemReservedMemory kubetypes.NUMANodeResources expectedHints map[string][]topologymanager.TopologyHint expectedReserved systemReservedMemory reserved systemReservedMemory @@ -157,21 +158,21 @@ func TestValidateReservedMemory(t *testing.T) { description string nodeAllocatableReservation v1.ResourceList machineInfo *cadvisorapi.MachineInfo - systemReservedMemory map[int]map[v1.ResourceName]resource.Quantity + systemReservedMemory kubetypes.NUMANodeResources expectedError string }{ { "Node Allocatable not set, reserved not set", v1.ResourceList{}, machineInfo, - map[int]map[v1.ResourceName]resource.Quantity{}, + kubetypes.NUMANodeResources{}, "", }, { "Node Allocatable set to zero, reserved set to zero", v1.ResourceList{v1.ResourceMemory: *resource.NewQuantity(0, resource.DecimalSI)}, machineInfo, - map[int]map[v1.ResourceName]resource.Quantity{ + kubetypes.NUMANodeResources{ 0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(0, resource.DecimalSI)}, }, "", @@ -180,7 +181,7 @@ func TestValidateReservedMemory(t *testing.T) { "Node Allocatable not set (equal zero), reserved set", v1.ResourceList{}, machineInfo, - map[int]map[v1.ResourceName]resource.Quantity{ + kubetypes.NUMANodeResources{ 0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(12, resource.DecimalSI)}, }, fmt.Sprintf(msgNotEqual, v1.ResourceMemory), @@ -189,14 +190,14 @@ func TestValidateReservedMemory(t *testing.T) { "Node Allocatable set, reserved not set", v1.ResourceList{hugepages2M: *resource.NewQuantity(5, resource.DecimalSI)}, machineInfo, - map[int]map[v1.ResourceName]resource.Quantity{}, + kubetypes.NUMANodeResources{}, fmt.Sprintf(msgNotEqual, hugepages2M), }, { "Reserved not equal to Node Allocatable", v1.ResourceList{v1.ResourceMemory: *resource.NewQuantity(5, resource.DecimalSI)}, machineInfo, - map[int]map[v1.ResourceName]resource.Quantity{ + kubetypes.NUMANodeResources{ 0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(12, resource.DecimalSI)}, }, fmt.Sprintf(msgNotEqual, v1.ResourceMemory), @@ -205,7 +206,7 @@ func TestValidateReservedMemory(t *testing.T) { "Reserved contains the NUMA node that does not exist under the machine", v1.ResourceList{v1.ResourceMemory: *resource.NewQuantity(17, resource.DecimalSI)}, machineInfo, - map[int]map[v1.ResourceName]resource.Quantity{ + kubetypes.NUMANodeResources{ 0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(12, resource.DecimalSI)}, 2: nodeResources{v1.ResourceMemory: *resource.NewQuantity(5, resource.DecimalSI)}, }, @@ -217,7 +218,7 @@ func TestValidateReservedMemory(t *testing.T) { hugepages2M: *resource.NewQuantity(77, resource.DecimalSI), hugepages1G: *resource.NewQuantity(13, resource.DecimalSI)}, machineInfo, - map[int]map[v1.ResourceName]resource.Quantity{ + kubetypes.NUMANodeResources{ 0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(12, resource.DecimalSI), hugepages2M: *resource.NewQuantity(70, resource.DecimalSI), hugepages1G: *resource.NewQuantity(13, resource.DecimalSI)}, @@ -232,7 +233,7 @@ func TestValidateReservedMemory(t *testing.T) { hugepages2M: *resource.NewQuantity(14, resource.DecimalSI), hugepages1G: *resource.NewQuantity(13, resource.DecimalSI)}, machineInfo, - map[int]map[v1.ResourceName]resource.Quantity{ + kubetypes.NUMANodeResources{ 0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(12, resource.DecimalSI), hugepages2M: *resource.NewQuantity(70, resource.DecimalSI), hugepages1G: *resource.NewQuantity(13, resource.DecimalSI)}, @@ -265,13 +266,13 @@ func TestConvertPreReserved(t *testing.T) { testCases := []struct { description string - systemReserved map[int]map[v1.ResourceName]resource.Quantity + systemReserved kubetypes.NUMANodeResources systemReservedExpected systemReservedMemory expectedError string }{ { "Empty", - map[int]map[v1.ResourceName]resource.Quantity{}, + kubetypes.NUMANodeResources{}, systemReservedMemory{ 0: map[v1.ResourceName]uint64{}, 1: map[v1.ResourceName]uint64{}, @@ -280,7 +281,7 @@ func TestConvertPreReserved(t *testing.T) { }, { "Single NUMA node is reserved", - map[int]map[v1.ResourceName]resource.Quantity{ + kubetypes.NUMANodeResources{ 0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(12, resource.DecimalSI), hugepages2M: *resource.NewQuantity(70, resource.DecimalSI), hugepages1G: *resource.NewQuantity(13, resource.DecimalSI)}, @@ -297,7 +298,7 @@ func TestConvertPreReserved(t *testing.T) { }, { "Both NUMA nodes are reserved", - map[int]map[v1.ResourceName]resource.Quantity{ + kubetypes.NUMANodeResources{ 0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(12, resource.DecimalSI), hugepages2M: *resource.NewQuantity(70, resource.DecimalSI), hugepages1G: *resource.NewQuantity(13, resource.DecimalSI)}, @@ -335,7 +336,7 @@ func TestGetSystemReservedMemory(t *testing.T) { { description: "Should return empty map when reservation is not done", nodeAllocatableReservation: v1.ResourceList{}, - systemReservedMemory: map[int]map[v1.ResourceName]resource.Quantity{}, + systemReservedMemory: kubetypes.NUMANodeResources{}, expectedReserved: systemReservedMemory{ 0: {}, 1: {}, @@ -346,7 +347,7 @@ func TestGetSystemReservedMemory(t *testing.T) { { description: "Should return error when Allocatable reservation is not equal pre reserved memory", nodeAllocatableReservation: v1.ResourceList{}, - systemReservedMemory: map[int]map[v1.ResourceName]resource.Quantity{ + systemReservedMemory: kubetypes.NUMANodeResources{ 0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(gb, resource.BinarySI)}, }, expectedReserved: nil, @@ -356,7 +357,7 @@ func TestGetSystemReservedMemory(t *testing.T) { { description: "Reserved should be equal to systemReservedMemory", nodeAllocatableReservation: v1.ResourceList{v1.ResourceMemory: *resource.NewQuantity(2*gb, resource.BinarySI)}, - systemReservedMemory: map[int]map[v1.ResourceName]resource.Quantity{ + systemReservedMemory: kubetypes.NUMANodeResources{ 0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(gb, resource.BinarySI)}, 1: nodeResources{v1.ResourceMemory: *resource.NewQuantity(gb, resource.BinarySI)}, }, @@ -2062,7 +2063,7 @@ func TestNewManager(t *testing.T) { policyName: policyTypeStatic, machineInfo: machineInfo, nodeAllocatableReservation: v1.ResourceList{v1.ResourceMemory: *resource.NewQuantity(2*gb, resource.BinarySI)}, - systemReservedMemory: map[int]map[v1.ResourceName]resource.Quantity{ + systemReservedMemory: kubetypes.NUMANodeResources{ 0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(gb, resource.BinarySI)}, 1: nodeResources{v1.ResourceMemory: *resource.NewQuantity(gb, resource.BinarySI)}, }, @@ -2075,7 +2076,7 @@ func TestNewManager(t *testing.T) { policyName: policyTypeStatic, machineInfo: machineInfo, nodeAllocatableReservation: v1.ResourceList{v1.ResourceMemory: *resource.NewQuantity(2*gb, resource.BinarySI)}, - systemReservedMemory: map[int]map[v1.ResourceName]resource.Quantity{ + systemReservedMemory: kubetypes.NUMANodeResources{ 0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(gb, resource.BinarySI)}, 1: nodeResources{v1.ResourceMemory: *resource.NewQuantity(2*gb, resource.BinarySI)}, }, @@ -2088,7 +2089,7 @@ func TestNewManager(t *testing.T) { policyName: policyTypeStatic, machineInfo: machineInfo, nodeAllocatableReservation: v1.ResourceList{}, - systemReservedMemory: map[int]map[v1.ResourceName]resource.Quantity{}, + systemReservedMemory: kubetypes.NUMANodeResources{}, affinity: topologymanager.NewFakeManager(), expectedError: fmt.Errorf("[memorymanager] you should specify the system reserved memory"), expectedReserved: expectedReserved, @@ -2098,7 +2099,7 @@ func TestNewManager(t *testing.T) { policyName: "fake", machineInfo: machineInfo, nodeAllocatableReservation: v1.ResourceList{}, - systemReservedMemory: map[int]map[v1.ResourceName]resource.Quantity{}, + systemReservedMemory: kubetypes.NUMANodeResources{}, affinity: topologymanager.NewFakeManager(), expectedError: fmt.Errorf("unknown policy: \"fake\""), expectedReserved: expectedReserved, @@ -2108,7 +2109,7 @@ func TestNewManager(t *testing.T) { policyName: policyTypeNone, machineInfo: machineInfo, nodeAllocatableReservation: v1.ResourceList{}, - systemReservedMemory: map[int]map[v1.ResourceName]resource.Quantity{}, + systemReservedMemory: kubetypes.NUMANodeResources{}, affinity: topologymanager.NewFakeManager(), expectedError: nil, expectedReserved: expectedReserved, diff --git a/pkg/kubelet/types/BUILD b/pkg/kubelet/types/BUILD index 2f4e1abe1918..06357196f2b8 100644 --- a/pkg/kubelet/types/BUILD +++ b/pkg/kubelet/types/BUILD @@ -20,6 +20,7 @@ go_library( deps = [ "//pkg/apis/scheduling:go_default_library", "//staging/src/k8s.io/api/core/v1:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/types:go_default_library", ], diff --git a/pkg/kubelet/types/types.go b/pkg/kubelet/types/types.go index 2ed7f1c991e5..8dc13bcc7913 100644 --- a/pkg/kubelet/types/types.go +++ b/pkg/kubelet/types/types.go @@ -21,6 +21,7 @@ import ( "time" v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/types" ) @@ -113,3 +114,6 @@ type ResolvedPodUID types.UID // MirrorPodUID is a pod UID for a mirror pod. type MirrorPodUID types.UID + +// NUMANodeResources is a set of (resource name, quantity) pairs for each NUMA node. +type NUMANodeResources map[int]map[v1.ResourceName]resource.Quantity From d0089db2ecffef78c3f394ecaa7152c9380e1bb1 Mon Sep 17 00:00:00 2001 From: Artyom Lukianov Date: Thu, 12 Nov 2020 15:54:30 +0200 Subject: [PATCH 25/31] memory manager: remove unused variable under stateCheckpoint --- pkg/kubelet/cm/memorymanager/memory_manager.go | 4 ++-- pkg/kubelet/cm/memorymanager/state/BUILD | 2 -- .../cm/memorymanager/state/state_checkpoint.go | 5 +---- .../memorymanager/state/state_checkpoint_test.go | 16 +++++----------- 4 files changed, 8 insertions(+), 19 deletions(-) diff --git a/pkg/kubelet/cm/memorymanager/memory_manager.go b/pkg/kubelet/cm/memorymanager/memory_manager.go index 4909db4383b8..4929eaa2699d 100644 --- a/pkg/kubelet/cm/memorymanager/memory_manager.go +++ b/pkg/kubelet/cm/memorymanager/memory_manager.go @@ -97,7 +97,7 @@ type manager struct { containerRuntime runtimeService // activePods is a method for listing active pods on the node - // so all the containers can be updated in the reconciliation loop. + // so all the containers can be updated during call to the removeStaleState. activePods ActivePodsFunc // podStatusProvider provides a method for obtaining pod statuses @@ -159,7 +159,7 @@ func (m *manager) Start(activePods ActivePodsFunc, sourcesReady config.SourcesRe m.containerRuntime = containerRuntime m.containerMap = initialContainers - stateImpl, err := state.NewCheckpointState(m.stateFileDirectory, memoryManagerStateFileName, m.policy.Name(), m.containerMap) + stateImpl, err := state.NewCheckpointState(m.stateFileDirectory, memoryManagerStateFileName, m.policy.Name()) if err != nil { klog.Errorf("[memorymanager] could not initialize checkpoint manager: %v, please drain node and remove policy state file", err) return err diff --git a/pkg/kubelet/cm/memorymanager/state/BUILD b/pkg/kubelet/cm/memorymanager/state/BUILD index a0fcfa9d2d61..b7719ee4651a 100644 --- a/pkg/kubelet/cm/memorymanager/state/BUILD +++ b/pkg/kubelet/cm/memorymanager/state/BUILD @@ -14,7 +14,6 @@ go_library( "//pkg/kubelet/checkpointmanager:go_default_library", "//pkg/kubelet/checkpointmanager/checksum:go_default_library", "//pkg/kubelet/checkpointmanager/errors:go_default_library", - "//pkg/kubelet/cm/containermap:go_default_library", "//staging/src/k8s.io/api/core/v1:go_default_library", "//vendor/k8s.io/klog/v2:go_default_library", ], @@ -26,7 +25,6 @@ go_test( embed = [":go_default_library"], deps = [ "//pkg/kubelet/checkpointmanager:go_default_library", - "//pkg/kubelet/cm/containermap:go_default_library", "//pkg/kubelet/cm/cpumanager/state/testing:go_default_library", "//staging/src/k8s.io/api/core/v1:go_default_library", "//vendor/github.com/stretchr/testify/assert:go_default_library", diff --git a/pkg/kubelet/cm/memorymanager/state/state_checkpoint.go b/pkg/kubelet/cm/memorymanager/state/state_checkpoint.go index 303501f7b15e..03333badc666 100644 --- a/pkg/kubelet/cm/memorymanager/state/state_checkpoint.go +++ b/pkg/kubelet/cm/memorymanager/state/state_checkpoint.go @@ -24,7 +24,6 @@ import ( "k8s.io/klog/v2" "k8s.io/kubernetes/pkg/kubelet/checkpointmanager" "k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors" - "k8s.io/kubernetes/pkg/kubelet/cm/containermap" ) var _ State = &stateCheckpoint{} @@ -35,11 +34,10 @@ type stateCheckpoint struct { policyName string checkpointManager checkpointmanager.CheckpointManager checkpointName string - initialContainers containermap.ContainerMap } // NewCheckpointState creates new State for keeping track of memory/pod assignment with checkpoint backend -func NewCheckpointState(stateDir, checkpointName, policyName string, initialContainers containermap.ContainerMap) (State, error) { +func NewCheckpointState(stateDir, checkpointName, policyName string) (State, error) { checkpointManager, err := checkpointmanager.NewCheckpointManager(stateDir) if err != nil { return nil, fmt.Errorf("failed to initialize checkpoint manager: %v", err) @@ -49,7 +47,6 @@ func NewCheckpointState(stateDir, checkpointName, policyName string, initialCont policyName: policyName, checkpointManager: checkpointManager, checkpointName: checkpointName, - initialContainers: initialContainers, } if err := stateCheckpoint.restoreState(); err != nil { diff --git a/pkg/kubelet/cm/memorymanager/state/state_checkpoint_test.go b/pkg/kubelet/cm/memorymanager/state/state_checkpoint_test.go index a72918719890..d37475dcca26 100644 --- a/pkg/kubelet/cm/memorymanager/state/state_checkpoint_test.go +++ b/pkg/kubelet/cm/memorymanager/state/state_checkpoint_test.go @@ -26,7 +26,6 @@ import ( v1 "k8s.io/api/core/v1" "k8s.io/kubernetes/pkg/kubelet/checkpointmanager" - "k8s.io/kubernetes/pkg/kubelet/cm/containermap" testutil "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state/testing" ) @@ -47,14 +46,12 @@ func TestCheckpointStateRestore(t *testing.T) { testCases := []struct { description string checkpointContent string - initialContainers containermap.ContainerMap expectedError string expectedState *stateMemory }{ { "Restore non-existing checkpoint", "", - containermap.ContainerMap{}, "", &stateMemory{}, }, @@ -66,7 +63,6 @@ func TestCheckpointStateRestore(t *testing.T) { "entries":{"pod":{"container1":[{"numaAffinity":[0],"type":"memory","size":512}]}}, "checksum": 4215593881 }`, - containermap.ContainerMap{}, "", &stateMemory{ assignments: ContainerMemoryAssignments{ @@ -103,14 +99,12 @@ func TestCheckpointStateRestore(t *testing.T) { "entries":{"pod":{"container1":[{"affinity":[0],"type":"memory","size":512}]}}, "checksum": 101010 }`, - containermap.ContainerMap{}, "checkpoint is corrupted", &stateMemory{}, }, { "Restore checkpoint with invalid JSON", `{`, - containermap.ContainerMap{}, "unexpected end of JSON input", &stateMemory{}, }, @@ -138,7 +132,7 @@ func TestCheckpointStateRestore(t *testing.T) { assert.NoError(t, cpm.CreateCheckpoint(testingCheckpoint, checkpoint), "could not create testing checkpoint") } - restoredState, err := NewCheckpointState(testingDir, testingCheckpoint, "static", tc.initialContainers) + restoredState, err := NewCheckpointState(testingDir, testingCheckpoint, "static") if strings.TrimSpace(tc.expectedError) != "" { assert.Error(t, err) assert.Contains(t, err.Error(), "could not restore state from checkpoint: "+tc.expectedError) @@ -191,7 +185,7 @@ func TestCheckpointStateStore(t *testing.T) { assert.NoError(t, cpm.RemoveCheckpoint(testingCheckpoint), "could not remove testing checkpoint") - cs1, err := NewCheckpointState(testingDir, testingCheckpoint, "static", nil) + cs1, err := NewCheckpointState(testingDir, testingCheckpoint, "static") assert.NoError(t, err, "could not create testing checkpointState instance") // set values of cs1 instance so they are stored in checkpoint and can be read by cs2 @@ -199,7 +193,7 @@ func TestCheckpointStateStore(t *testing.T) { cs1.SetMemoryAssignments(expectedState.assignments) // restore checkpoint with previously stored values - cs2, err := NewCheckpointState(testingDir, testingCheckpoint, "static", nil) + cs2, err := NewCheckpointState(testingDir, testingCheckpoint, "static") assert.NoError(t, err, "could not create testing checkpointState instance") assertStateEqual(t, cs2, expectedState) @@ -313,7 +307,7 @@ func TestCheckpointStateHelpers(t *testing.T) { // ensure there is no previous checkpoint assert.NoError(t, cpm.RemoveCheckpoint(testingCheckpoint), "could not remove testing checkpoint") - state, err := NewCheckpointState(testingDir, testingCheckpoint, "static", nil) + state, err := NewCheckpointState(testingDir, testingCheckpoint, "static") assert.NoError(t, err, "could not create testing checkpoint manager") state.SetMachineState(tc.machineState) @@ -376,7 +370,7 @@ func TestCheckpointStateClear(t *testing.T) { for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { - state, err := NewCheckpointState(testingDir, testingCheckpoint, "static", nil) + state, err := NewCheckpointState(testingDir, testingCheckpoint, "static") assert.NoError(t, err, "could not create testing checkpoint manager") state.SetMachineState(tc.machineState) From 0fa5dd553298ee0288716a034bb8788754ae103c Mon Sep 17 00:00:00 2001 From: Artyom Lukianov Date: Thu, 12 Nov 2020 17:15:49 +0200 Subject: [PATCH 26/31] memory manager: move the fakeTopologyManagerWithHint Move the fakeTopologyManagerWithHint and all related methods from the topology manager package to the memory manager static policy unittests. Signed-off-by: Artyom Lukianov --- pkg/kubelet/cm/memorymanager/BUILD | 2 + .../cm/memorymanager/policy_static_test.go | 41 ++++++++++++++++++- .../topologymanager/fake_topology_manager.go | 20 +-------- 3 files changed, 43 insertions(+), 20 deletions(-) diff --git a/pkg/kubelet/cm/memorymanager/BUILD b/pkg/kubelet/cm/memorymanager/BUILD index 200bab3364a4..4d0552e2a826 100644 --- a/pkg/kubelet/cm/memorymanager/BUILD +++ b/pkg/kubelet/cm/memorymanager/BUILD @@ -41,7 +41,9 @@ go_test( "//pkg/kubelet/cm/memorymanager/state:go_default_library", "//pkg/kubelet/cm/topologymanager:go_default_library", "//pkg/kubelet/cm/topologymanager/bitmask:go_default_library", + "//pkg/kubelet/lifecycle:go_default_library", "//pkg/kubelet/types:go_default_library", + "//pkg/kubelet/util/format:go_default_library", "//staging/src/k8s.io/api/core/v1:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", diff --git a/pkg/kubelet/cm/memorymanager/policy_static_test.go b/pkg/kubelet/cm/memorymanager/policy_static_test.go index 612e2417de48..bf2a66fca2b0 100644 --- a/pkg/kubelet/cm/memorymanager/policy_static_test.go +++ b/pkg/kubelet/cm/memorymanager/policy_static_test.go @@ -29,6 +29,8 @@ import ( "k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask" + "k8s.io/kubernetes/pkg/kubelet/lifecycle" + "k8s.io/kubernetes/pkg/kubelet/util/format" ) const ( @@ -65,6 +67,43 @@ var ( } ) +type fakeTopologyManagerWithHint struct { + hint *topologymanager.TopologyHint +} + +// NewFakeTopologyManagerWithHint returns an instance of fake topology manager with specified topology hints +func NewFakeTopologyManagerWithHint(hint *topologymanager.TopologyHint) topologymanager.Manager { + return &fakeTopologyManagerWithHint{ + hint: hint, + } +} + +func (m *fakeTopologyManagerWithHint) AddHintProvider(h topologymanager.HintProvider) { + klog.Infof("[fake topologymanager] AddHintProvider HintProvider: %v", h) +} + +func (m *fakeTopologyManagerWithHint) AddContainer(pod *v1.Pod, containerID string) error { + klog.Infof("[fake topologymanager] AddContainer pod: %v container id: %v", format.Pod(pod), containerID) + return nil +} + +func (m *fakeTopologyManagerWithHint) RemoveContainer(containerID string) error { + klog.Infof("[fake topologymanager] RemoveContainer container id: %v", containerID) + return nil +} + +func (m *fakeTopologyManagerWithHint) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle.PodAdmitResult { + klog.Infof("[fake topologymanager] Topology Admit Handler") + return lifecycle.PodAdmitResult{ + Admit: true, + } +} + +func (m *fakeTopologyManagerWithHint) GetAffinity(podUID string, containerName string) topologymanager.TopologyHint { + klog.Infof("[fake topologymanager] GetAffinity podUID: %v container name: %v", podUID, containerName) + return *m.hint +} + func areMemoryBlocksEqual(mb1, mb2 []state.Block) bool { if len(mb1) != len(mb2) { return false @@ -132,7 +171,7 @@ type testStaticPolicy struct { func initTests(testCase *testStaticPolicy, hint *topologymanager.TopologyHint) (Policy, state.State, error) { manager := topologymanager.NewFakeManager() if hint != nil { - manager = topologymanager.NewFakeManagerWithHint(hint) + manager = NewFakeTopologyManagerWithHint(hint) } p, err := NewPolicyStatic(testCase.machineInfo, testCase.systemReserved, manager) diff --git a/pkg/kubelet/cm/topologymanager/fake_topology_manager.go b/pkg/kubelet/cm/topologymanager/fake_topology_manager.go index f843f24afe6b..a21e50c555a8 100644 --- a/pkg/kubelet/cm/topologymanager/fake_topology_manager.go +++ b/pkg/kubelet/cm/topologymanager/fake_topology_manager.go @@ -17,7 +17,7 @@ limitations under the License. package topologymanager import ( - v1 "k8s.io/api/core/v1" + "k8s.io/api/core/v1" "k8s.io/klog/v2" "k8s.io/kubernetes/pkg/kubelet/lifecycle" "k8s.io/kubernetes/pkg/kubelet/util/format" @@ -56,21 +56,3 @@ func (m *fakeManager) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle.PodAd Admit: true, } } - -type fakeManagerWithHint struct { - fakeManager - hint *TopologyHint -} - -// NewFakeManagerWithHint returns an instance of FakeManager with specified topology hints -func NewFakeManagerWithHint(hint *TopologyHint) Manager { - return &fakeManagerWithHint{ - fakeManager: fakeManager{}, - hint: hint, - } -} - -func (m *fakeManagerWithHint) GetAffinity(podUID string, containerName string) TopologyHint { - klog.Infof("[fake topologymanager] GetAffinity podUID: %v container name: %v", podUID, containerName) - return *m.hint -} From b7cfc40debe424837423052ba67f2be9ea633809 Mon Sep 17 00:00:00 2001 From: Artyom Lukianov Date: Wed, 18 Nov 2020 00:15:35 +0200 Subject: [PATCH 27/31] memory manager: update kubelet config API Update kubelet config API to use slice of MemoryReservation structs instead of slice of maps. Signed-off-by: Artyom Lukianov --- pkg/kubelet/apis/config/helpers_test.go | 9 ++++- pkg/kubelet/apis/config/types.go | 26 ++++++++++--- pkg/kubelet/apis/config/v1beta1/BUILD | 2 + .../config/v1beta1/zz_generated.conversion.go | 37 ++++++++++++++++++- .../config/v1beta1/zz_generated.defaults.go | 5 +++ .../apis/config/zz_generated.deepcopy.go | 34 +++++++++++++---- .../k8s.io/kubelet/config/v1beta1/types.go | 26 ++++++++++--- .../config/v1beta1/zz_generated.deepcopy.go | 34 +++++++++++++---- 8 files changed, 142 insertions(+), 31 deletions(-) diff --git a/pkg/kubelet/apis/config/helpers_test.go b/pkg/kubelet/apis/config/helpers_test.go index f0787e65889a..b1782b6fb521 100644 --- a/pkg/kubelet/apis/config/helpers_test.go +++ b/pkg/kubelet/apis/config/helpers_test.go @@ -221,7 +221,14 @@ var ( "ReadOnlyPort", "RegistryBurst", "RegistryPullQPS", - "ReservedMemory[*][*]", + "ReservedMemory[*].Limits[*].Format", + "ReservedMemory[*].Limits[*].d.Dec.scale", + "ReservedMemory[*].Limits[*].d.Dec.unscaled.abs[*]", + "ReservedMemory[*].Limits[*].d.Dec.unscaled.neg", + "ReservedMemory[*].Limits[*].i.scale", + "ReservedMemory[*].Limits[*].i.value", + "ReservedMemory[*].Limits[*].s", + "ReservedMemory[*].NumaNode", "ReservedSystemCPUs", "RuntimeRequestTimeout.Duration", "RunOnce", diff --git a/pkg/kubelet/apis/config/types.go b/pkg/kubelet/apis/config/types.go index 13a5fbbd280e..1dddcf3bd7bc 100644 --- a/pkg/kubelet/apis/config/types.go +++ b/pkg/kubelet/apis/config/types.go @@ -385,12 +385,20 @@ type KubeletConfiguration struct { // Defaults to 10 seconds, requires GracefulNodeShutdown feature gate to be enabled. // For example, if ShutdownGracePeriod=30s, and ShutdownGracePeriodCriticalPods=10s, during a node shutdown the first 20 seconds would be reserved for gracefully terminating normal pods, and the last 10 seconds would be reserved for terminating critical pods. ShutdownGracePeriodCriticalPods metav1.Duration - // A comma separated list of bracket-enclosed configurations for memory manager. - // Each configuration describes pre-reserved memory for the particular memory type on a specific NUMA node. - // The Memory Manager validates whether total amount of pre-reserved memory is identical to reserved-memory by the Node Allocatable feature. - // The format is {numa-node=integer, memory-type=string, limit=string} - // (e.g. {numa-node=0, type=memory, limit=1Gi}, {numa-node=1, type=memory, limit=1Gi}) - ReservedMemory []map[string]string + // ReservedMemory specifies a comma-separated list of memory reservations for NUMA nodes. + // The parameter makes sense only in the context of the memory manager feature. The memory manager will not allocate reserved memory for container workloads. + // For example, if you have a NUMA0 with 10Gi of memory and the ReservedMemory was specified to reserve 1Gi of memory at NUMA0, + // the memory manager will assume that only 9Gi is available for allocation. + // You can specify a different amount of NUMA node and memory types. + // You can omit this parameter at all, but you should be aware that the amount of reserved memory from all NUMA nodes + // should be equal to the amount of memory specified by the node allocatable features(https://kubernetes.io/docs/tasks/administer-cluster/reserve-compute-resources/#node-allocatable). + // If at least one node allocatable parameter has a non-zero value, you will need to specify at least one NUMA node. + // Also, avoid specifying: + // 1. Duplicates, the same NUMA node, and memory type, but with a different value. + // 2. zero limits for any memory type. + // 3. NUMAs nodes IDs that do not exist under the machine. + // 4. memory types except for memory and hugepages- + ReservedMemory []MemoryReservation } // KubeletAuthorizationMode denotes the authorization mode for the kubelet @@ -544,3 +552,9 @@ type ExecEnvVar struct { Name string Value string } + +// MemoryReservation specifies the memory reservation of different types for each NUMA node +type MemoryReservation struct { + NumaNode int32 + Limits v1.ResourceList +} diff --git a/pkg/kubelet/apis/config/v1beta1/BUILD b/pkg/kubelet/apis/config/v1beta1/BUILD index 66bc0bdd0d8d..05d5f34c8b6e 100644 --- a/pkg/kubelet/apis/config/v1beta1/BUILD +++ b/pkg/kubelet/apis/config/v1beta1/BUILD @@ -19,10 +19,12 @@ go_library( ], importpath = "k8s.io/kubernetes/pkg/kubelet/apis/config/v1beta1", deps = [ + "//pkg/apis/core/v1:go_default_library", "//pkg/cluster/ports:go_default_library", "//pkg/kubelet/apis/config:go_default_library", "//pkg/kubelet/qos:go_default_library", "//pkg/kubelet/types:go_default_library", + "//staging/src/k8s.io/api/core/v1:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/conversion:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library", diff --git a/pkg/kubelet/apis/config/v1beta1/zz_generated.conversion.go b/pkg/kubelet/apis/config/v1beta1/zz_generated.conversion.go index d0865b25746c..a1c7ddcfca25 100644 --- a/pkg/kubelet/apis/config/v1beta1/zz_generated.conversion.go +++ b/pkg/kubelet/apis/config/v1beta1/zz_generated.conversion.go @@ -23,6 +23,7 @@ package v1beta1 import ( unsafe "unsafe" + corev1 "k8s.io/api/core/v1" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" conversion "k8s.io/apimachinery/pkg/conversion" runtime "k8s.io/apimachinery/pkg/runtime" @@ -108,6 +109,16 @@ func RegisterConversions(s *runtime.Scheme) error { }); err != nil { return err } + if err := s.AddGeneratedConversionFunc((*v1beta1.MemoryReservation)(nil), (*config.MemoryReservation)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1beta1_MemoryReservation_To_config_MemoryReservation(a.(*v1beta1.MemoryReservation), b.(*config.MemoryReservation), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*config.MemoryReservation)(nil), (*v1beta1.MemoryReservation)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_config_MemoryReservation_To_v1beta1_MemoryReservation(a.(*config.MemoryReservation), b.(*v1beta1.MemoryReservation), scope) + }); err != nil { + return err + } if err := s.AddGeneratedConversionFunc((*v1beta1.SerializedNodeConfigSource)(nil), (*config.SerializedNodeConfigSource)(nil), func(a, b interface{}, scope conversion.Scope) error { return Convert_v1beta1_SerializedNodeConfigSource_To_config_SerializedNodeConfigSource(a.(*v1beta1.SerializedNodeConfigSource), b.(*config.SerializedNodeConfigSource), scope) }); err != nil { @@ -353,7 +364,7 @@ func autoConvert_v1beta1_KubeletConfiguration_To_config_KubeletConfiguration(in } out.ShutdownGracePeriod = in.ShutdownGracePeriod out.ShutdownGracePeriodCriticalPods = in.ShutdownGracePeriodCriticalPods - out.ReservedMemory = *(*[]map[string]string)(unsafe.Pointer(&in.ReservedMemory)) + out.ReservedMemory = *(*[]config.MemoryReservation)(unsafe.Pointer(&in.ReservedMemory)) return nil } @@ -508,7 +519,7 @@ func autoConvert_config_KubeletConfiguration_To_v1beta1_KubeletConfiguration(in } out.ShutdownGracePeriod = in.ShutdownGracePeriod out.ShutdownGracePeriodCriticalPods = in.ShutdownGracePeriodCriticalPods - out.ReservedMemory = *(*[]map[string]string)(unsafe.Pointer(&in.ReservedMemory)) + out.ReservedMemory = *(*[]v1beta1.MemoryReservation)(unsafe.Pointer(&in.ReservedMemory)) return nil } @@ -585,6 +596,28 @@ func Convert_config_KubeletX509Authentication_To_v1beta1_KubeletX509Authenticati return autoConvert_config_KubeletX509Authentication_To_v1beta1_KubeletX509Authentication(in, out, s) } +func autoConvert_v1beta1_MemoryReservation_To_config_MemoryReservation(in *v1beta1.MemoryReservation, out *config.MemoryReservation, s conversion.Scope) error { + out.NumaNode = in.NumaNode + out.Limits = *(*corev1.ResourceList)(unsafe.Pointer(&in.Limits)) + return nil +} + +// Convert_v1beta1_MemoryReservation_To_config_MemoryReservation is an autogenerated conversion function. +func Convert_v1beta1_MemoryReservation_To_config_MemoryReservation(in *v1beta1.MemoryReservation, out *config.MemoryReservation, s conversion.Scope) error { + return autoConvert_v1beta1_MemoryReservation_To_config_MemoryReservation(in, out, s) +} + +func autoConvert_config_MemoryReservation_To_v1beta1_MemoryReservation(in *config.MemoryReservation, out *v1beta1.MemoryReservation, s conversion.Scope) error { + out.NumaNode = in.NumaNode + out.Limits = *(*corev1.ResourceList)(unsafe.Pointer(&in.Limits)) + return nil +} + +// Convert_config_MemoryReservation_To_v1beta1_MemoryReservation is an autogenerated conversion function. +func Convert_config_MemoryReservation_To_v1beta1_MemoryReservation(in *config.MemoryReservation, out *v1beta1.MemoryReservation, s conversion.Scope) error { + return autoConvert_config_MemoryReservation_To_v1beta1_MemoryReservation(in, out, s) +} + func autoConvert_v1beta1_SerializedNodeConfigSource_To_config_SerializedNodeConfigSource(in *v1beta1.SerializedNodeConfigSource, out *config.SerializedNodeConfigSource, s conversion.Scope) error { out.Source = in.Source return nil diff --git a/pkg/kubelet/apis/config/v1beta1/zz_generated.defaults.go b/pkg/kubelet/apis/config/v1beta1/zz_generated.defaults.go index 7c127d46e2f3..8a4efba40b0d 100644 --- a/pkg/kubelet/apis/config/v1beta1/zz_generated.defaults.go +++ b/pkg/kubelet/apis/config/v1beta1/zz_generated.defaults.go @@ -23,6 +23,7 @@ package v1beta1 import ( runtime "k8s.io/apimachinery/pkg/runtime" v1beta1 "k8s.io/kubelet/config/v1beta1" + v1 "k8s.io/kubernetes/pkg/apis/core/v1" ) // RegisterDefaults adds defaulters functions to the given scheme. @@ -35,4 +36,8 @@ func RegisterDefaults(scheme *runtime.Scheme) error { func SetObjectDefaults_KubeletConfiguration(in *v1beta1.KubeletConfiguration) { SetDefaults_KubeletConfiguration(in) + for i := range in.ReservedMemory { + a := &in.ReservedMemory[i] + v1.SetDefaults_ResourceList(&a.Limits) + } } diff --git a/pkg/kubelet/apis/config/zz_generated.deepcopy.go b/pkg/kubelet/apis/config/zz_generated.deepcopy.go index 5dc85843b6ef..6c78450a5efd 100644 --- a/pkg/kubelet/apis/config/zz_generated.deepcopy.go +++ b/pkg/kubelet/apis/config/zz_generated.deepcopy.go @@ -21,6 +21,7 @@ limitations under the License. package config import ( + corev1 "k8s.io/api/core/v1" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" runtime "k8s.io/apimachinery/pkg/runtime" ) @@ -275,15 +276,9 @@ func (in *KubeletConfiguration) DeepCopyInto(out *KubeletConfiguration) { out.ShutdownGracePeriodCriticalPods = in.ShutdownGracePeriodCriticalPods if in.ReservedMemory != nil { in, out := &in.ReservedMemory, &out.ReservedMemory - *out = make([]map[string]string, len(*in)) + *out = make([]MemoryReservation, len(*in)) for i := range *in { - if (*in)[i] != nil { - in, out := &(*in)[i], &(*out)[i] - *out = make(map[string]string, len(*in)) - for key, val := range *in { - (*out)[key] = val - } - } + (*in)[i].DeepCopyInto(&(*out)[i]) } } return @@ -358,6 +353,29 @@ func (in *KubeletX509Authentication) DeepCopy() *KubeletX509Authentication { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MemoryReservation) DeepCopyInto(out *MemoryReservation) { + *out = *in + if in.Limits != nil { + in, out := &in.Limits, &out.Limits + *out = make(corev1.ResourceList, len(*in)) + for key, val := range *in { + (*out)[key] = val.DeepCopy() + } + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MemoryReservation. +func (in *MemoryReservation) DeepCopy() *MemoryReservation { + if in == nil { + return nil + } + out := new(MemoryReservation) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *SerializedNodeConfigSource) DeepCopyInto(out *SerializedNodeConfigSource) { *out = *in diff --git a/staging/src/k8s.io/kubelet/config/v1beta1/types.go b/staging/src/k8s.io/kubelet/config/v1beta1/types.go index c75d129f2356..4ae08f27c4b7 100644 --- a/staging/src/k8s.io/kubelet/config/v1beta1/types.go +++ b/staging/src/k8s.io/kubelet/config/v1beta1/types.go @@ -838,14 +838,22 @@ type KubeletConfiguration struct { // Default: "10s" // +optional ShutdownGracePeriodCriticalPods metav1.Duration `json:"shutdownGracePeriodCriticalPods,omitempty"` - // A comma separated list of bracket-enclosed configurations for memory manager. - // Each configuration describes pre-reserved memory for the certain memory type on a specific NUMA node. - // The Memory Manager validates whether total amount of pre-reserved memory is identical to reserved-memory by the Node Allocatable feature. - // The format is {numa-node=integer, memory-type=string, limit=string} - // (e.g. {numa-node=0, type=memory, limit=1Gi}, {numa-node=1, type=memory, limit=1Gi}) + // ReservedMemory specifies a comma-separated list of memory reservations for NUMA nodes. + // The parameter makes sense only in the context of the memory manager feature. The memory manager will not allocate reserved memory for container workloads. + // For example, if you have a NUMA0 with 10Gi of memory and the ReservedMemory was specified to reserve 1Gi of memory at NUMA0, + // the memory manager will assume that only 9Gi is available for allocation. + // You can specify a different amount of NUMA node and memory types. + // You can omit this parameter at all, but you should be aware that the amount of reserved memory from all NUMA nodes + // should be equal to the amount of memory specified by the node allocatable features(https://kubernetes.io/docs/tasks/administer-cluster/reserve-compute-resources/#node-allocatable). + // If at least one node allocatable parameter has a non-zero value, you will need to specify at least one NUMA node. + // Also, avoid specifying: + // 1. Duplicates, the same NUMA node, and memory type, but with a different value. + // 2. zero limits for any memory type. + // 3. NUMAs nodes IDs that do not exist under the machine. + // 4. memory types except for memory and hugepages- // Default: nil // +optional - ReservedMemory []map[string]string `json:"reservedMemory,omitempty"` + ReservedMemory []MemoryReservation `json:"reservedMemory,omitempty"` } type KubeletAuthorizationMode string @@ -926,3 +934,9 @@ type SerializedNodeConfigSource struct { // +optional Source v1.NodeConfigSource `json:"source,omitempty" protobuf:"bytes,1,opt,name=source"` } + +// MemoryReservation specifies the memory reservation of different types for each NUMA node +type MemoryReservation struct { + NumaNode int32 `json:"numaNode"` + Limits v1.ResourceList `json:"limits"` +} diff --git a/staging/src/k8s.io/kubelet/config/v1beta1/zz_generated.deepcopy.go b/staging/src/k8s.io/kubelet/config/v1beta1/zz_generated.deepcopy.go index a6c7f56023ef..0cca1feecde1 100644 --- a/staging/src/k8s.io/kubelet/config/v1beta1/zz_generated.deepcopy.go +++ b/staging/src/k8s.io/kubelet/config/v1beta1/zz_generated.deepcopy.go @@ -21,6 +21,7 @@ limitations under the License. package v1beta1 import ( + corev1 "k8s.io/api/core/v1" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" runtime "k8s.io/apimachinery/pkg/runtime" ) @@ -305,15 +306,9 @@ func (in *KubeletConfiguration) DeepCopyInto(out *KubeletConfiguration) { out.ShutdownGracePeriodCriticalPods = in.ShutdownGracePeriodCriticalPods if in.ReservedMemory != nil { in, out := &in.ReservedMemory, &out.ReservedMemory - *out = make([]map[string]string, len(*in)) + *out = make([]MemoryReservation, len(*in)) for i := range *in { - if (*in)[i] != nil { - in, out := &(*in)[i], &(*out)[i] - *out = make(map[string]string, len(*in)) - for key, val := range *in { - (*out)[key] = val - } - } + (*in)[i].DeepCopyInto(&(*out)[i]) } } return @@ -393,6 +388,29 @@ func (in *KubeletX509Authentication) DeepCopy() *KubeletX509Authentication { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MemoryReservation) DeepCopyInto(out *MemoryReservation) { + *out = *in + if in.Limits != nil { + in, out := &in.Limits, &out.Limits + *out = make(corev1.ResourceList, len(*in)) + for key, val := range *in { + (*out)[key] = val.DeepCopy() + } + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MemoryReservation. +func (in *MemoryReservation) DeepCopy() *MemoryReservation { + if in == nil { + return nil + } + out := new(MemoryReservation) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *SerializedNodeConfigSource) DeepCopyInto(out *SerializedNodeConfigSource) { *out = *in From 7561a0f96e4b2f34685e2cfe6540014d51808a44 Mon Sep 17 00:00:00 2001 From: Artyom Lukianov Date: Wed, 18 Nov 2020 00:19:36 +0200 Subject: [PATCH 28/31] memory manager: provide new flag var to parse reserved-memory parameter The new flag will parse the `--reserved-memory` flag straight forward to the []kubeletconfig.MemoryReservation variable instead of parsing it to the middle map representation. It gives us possibility to get rid of a lot of unneeded code and use the single presentation for the reserved-memory. Signed-off-by: Artyom Lukianov --- cmd/kubelet/app/BUILD | 1 - cmd/kubelet/app/options/options.go | 2 +- cmd/kubelet/app/server.go | 61 +----- cmd/kubelet/app/server_test.go | 57 ------ pkg/kubelet/cm/BUILD | 4 +- pkg/kubelet/cm/container_manager.go | 11 +- pkg/util/flag/BUILD | 12 +- pkg/util/flag/flags.go | 104 ++++++++++ pkg/util/flag/flags_test.go | 124 ++++++++++++ .../src/k8s.io/component-base/cli/flag/BUILD | 2 - ...acket_separated_slice_map_string_string.go | 111 ----------- ..._separated_slice_map_string_string_test.go | 178 ------------------ 12 files changed, 249 insertions(+), 418 deletions(-) delete mode 100644 staging/src/k8s.io/component-base/cli/flag/bracket_separated_slice_map_string_string.go delete mode 100644 staging/src/k8s.io/component-base/cli/flag/bracket_separated_slice_map_string_string_test.go diff --git a/cmd/kubelet/app/BUILD b/cmd/kubelet/app/BUILD index 89826ffa84cc..bfd511422e25 100644 --- a/cmd/kubelet/app/BUILD +++ b/cmd/kubelet/app/BUILD @@ -18,7 +18,6 @@ go_library( "//cmd/kubelet/app/options:go_default_library", "//pkg/api/legacyscheme:go_default_library", "//pkg/apis/core:go_default_library", - "//pkg/apis/core/v1/helper:go_default_library", "//pkg/capabilities:go_default_library", "//pkg/cloudprovider/providers:go_default_library", "//pkg/credentialprovider:go_default_library", diff --git a/cmd/kubelet/app/options/options.go b/cmd/kubelet/app/options/options.go index 471d083f5a63..0caa42c98307 100644 --- a/cmd/kubelet/app/options/options.go +++ b/cmd/kubelet/app/options/options.go @@ -554,5 +554,5 @@ Runtime log sanitization may introduce significant computation overhead and ther // Memory Manager Flags fs.StringVar(&c.MemoryManagerPolicy, "memory-manager-policy", c.MemoryManagerPolicy, "Memory Manager policy to use. Possible values: 'none', 'static'. Default: 'none'") // TODO: once documentation link is available, replace KEP link with the documentation one. - fs.Var(cliflag.NewBracketSeparatedSliceMapStringString(&c.ReservedMemory), "reserved-memory", "A comma separated list of bracket-enclosed configuration for memory manager (e.g. {numa-node=0, type=memory, limit=1Gi}, {numa-node=1, type=memory, limit=1Gi}). The total sum for each memory type should be equal to the sum of kube-reserved, system-reserved and eviction-threshold. See more details under https://github.com/kubernetes/enhancements/tree/master/keps/sig-node/1769-memory-manager#reserved-memory-flag") + fs.Var(&utilflag.ReservedMemoryVar{Value: &c.ReservedMemory}, "reserved-memory", "A comma separated list of memory reservations for NUMA nodes. (e.g. --reserved-memory 0:memory=1Gi,hugepages-1M=2Gi --reserved-memory 1:memory=2Gi). The total sum for each memory type should be equal to the sum of kube-reserved, system-reserved and eviction-threshold. See more details under https://github.com/kubernetes/enhancements/tree/master/keps/sig-node/1769-memory-manager#reserved-memory-flag") } diff --git a/cmd/kubelet/app/server.go b/cmd/kubelet/app/server.go index 42f0167ea1dc..79908ff608a6 100644 --- a/cmd/kubelet/app/server.go +++ b/cmd/kubelet/app/server.go @@ -71,7 +71,6 @@ import ( "k8s.io/kubernetes/cmd/kubelet/app/options" "k8s.io/kubernetes/pkg/api/legacyscheme" api "k8s.io/kubernetes/pkg/apis/core" - corev1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper" "k8s.io/kubernetes/pkg/capabilities" "k8s.io/kubernetes/pkg/credentialprovider" "k8s.io/kubernetes/pkg/features" @@ -689,11 +688,6 @@ func run(ctx context.Context, s *options.KubeletServer, kubeDeps *kubelet.Depend klog.Infof("After cpu setting is overwritten, KubeReserved=\"%v\", SystemReserved=\"%v\"", s.KubeReserved, s.SystemReserved) } - reservedMemory, err := parseReservedMemoryConfig(s.ReservedMemory) - if err != nil { - return err - } - kubeReserved, err := parseResourceList(s.KubeReserved) if err != nil { return err @@ -743,7 +737,7 @@ func run(ctx context.Context, s *options.KubeletServer, kubeDeps *kubelet.Depend ExperimentalCPUManagerPolicy: s.CPUManagerPolicy, ExperimentalCPUManagerReconcilePeriod: s.CPUManagerReconcilePeriod.Duration, ExperimentalMemoryManagerPolicy: s.MemoryManagerPolicy, - ExperimentalMemoryManagerReservedMemory: reservedMemory, + ExperimentalMemoryManagerReservedMemory: s.ReservedMemory, ExperimentalPodPidsLimit: s.PodPidsLimit, EnforceCPULimits: s.CPUCFSQuota, CPUCFSQuotaPeriod: s.CPUCFSQuotaPeriod.Duration, @@ -1305,59 +1299,6 @@ func parseResourceList(m map[string]string) (v1.ResourceList, error) { return rl, nil } -func parseReservedMemoryConfig(config []map[string]string) (kubetypes.NUMANodeResources, error) { - if len(config) == 0 { - return nil, nil - } - - const ( - indexKey = "numa-node" - typeKey = "type" - limitKey = "limit" - ) - - keys := []string{indexKey, typeKey, limitKey} - - // check whether all keys are present - for _, m := range config { - for _, key := range keys { - if _, exist := m[key]; !exist { - return nil, fmt.Errorf("key: %s is missing in given ReservedMemory flag: %v", key, config) - } - } - } - - parsed := make(kubetypes.NUMANodeResources, len(config)) - for _, m := range config { - idxInString, _ := m[indexKey] - idx, err := strconv.Atoi(idxInString) - if err != nil || idx < 0 { - return nil, fmt.Errorf("NUMA index conversion error for value: \"%s\"", idxInString) - } - - typeInString, _ := m[typeKey] - v1Type := v1.ResourceName(typeInString) - if v1Type != v1.ResourceMemory && !corev1helper.IsHugePageResourceName(v1Type) { - return nil, fmt.Errorf("memory type conversion error, unknown type: \"%s\"", typeInString) - } - if corev1helper.IsHugePageResourceName(v1Type) { - if _, err := corev1helper.HugePageSizeFromResourceName(v1Type); err != nil { - return nil, fmt.Errorf("memory type conversion error, unknown type: \"%s\"", typeInString) - } - } - - limitInString, _ := m[limitKey] - limit, err := resource.ParseQuantity(limitInString) - if err != nil || limit.Sign() != 1 { - return nil, fmt.Errorf("memory limit conversion error for value \"%s\"", limitInString) - } - parsed[idx] = make(map[v1.ResourceName]resource.Quantity) - parsed[idx][v1Type] = limit - } - - return parsed, nil -} - // BootstrapKubeletConfigController constructs and bootstrap a configuration controller func BootstrapKubeletConfigController(dynamicConfigDir string, transform dynamickubeletconfig.TransformFunc) (*kubeletconfiginternal.KubeletConfiguration, *dynamickubeletconfig.Controller, error) { if !utilfeature.DefaultFeatureGate.Enabled(features.DynamicKubeletConfig) { diff --git a/cmd/kubelet/app/server_test.go b/cmd/kubelet/app/server_test.go index c80d1fd4d272..1db214ab954b 100644 --- a/cmd/kubelet/app/server_test.go +++ b/cmd/kubelet/app/server_test.go @@ -61,60 +61,3 @@ func TestValueOfAllocatableResources(t *testing.T) { } } } - -func TestValueOfReservedMemoryConfig(t *testing.T) { - testCases := []struct { - config []map[string]string - errorExpected bool - name string - }{ - { - config: []map[string]string{{"numa-node": "0", "type": "memory", "limit": "2Gi"}}, - errorExpected: false, - name: "Valid resource quantity", - }, - { - config: []map[string]string{{"numa-node": "0", "type": "memory", "limit": "2000m"}, {"numa-node": "1", "type": "memory", "limit": "1Gi"}}, - errorExpected: false, - name: "Valid resource quantity", - }, - { - config: []map[string]string{{"type": "memory", "limit": "2Gi"}}, - errorExpected: true, - name: "Missing key", - }, - { - config: []map[string]string{{"numa-node": "one", "type": "memory", "limit": "2Gi"}}, - errorExpected: true, - name: "Wrong 'numa-node' value", - }, - { - config: []map[string]string{{"numa-node": "0", "type": "not-memory", "limit": "2Gi"}}, - errorExpected: true, - name: "Wrong 'memory' value", - }, - { - config: []map[string]string{{"numa-node": "0", "type": "memory", "limit": "2Gigs"}}, - errorExpected: true, - name: "Wrong 'limit' value", - }, - { - config: []map[string]string{{"numa-node": "-1", "type": "memory", "limit": "2Gigs"}}, - errorExpected: true, - name: "Invalid 'numa-node' number", - }, - } - - for _, test := range testCases { - _, err := parseReservedMemoryConfig(test.config) - if test.errorExpected { - if err == nil { - t.Errorf("%s: error expected", test.name) - } - } else { - if err != nil { - t.Errorf("%s: unexpected error: %v", test.name, err) - } - } - } -} diff --git a/pkg/kubelet/cm/BUILD b/pkg/kubelet/cm/BUILD index 4ac0d10c67bc..e90df47ce01a 100644 --- a/pkg/kubelet/cm/BUILD +++ b/pkg/kubelet/cm/BUILD @@ -31,6 +31,7 @@ go_library( visibility = ["//visibility:public"], deps = [ "//pkg/features:go_default_library", + "//pkg/kubelet/apis/config:go_default_library", "//pkg/kubelet/cm/cpumanager:go_default_library", "//pkg/kubelet/cm/cpuset:go_default_library", "//pkg/kubelet/cm/memorymanager:go_default_library", @@ -41,7 +42,6 @@ go_library( "//pkg/kubelet/lifecycle:go_default_library", "//pkg/kubelet/pluginmanager/cache:go_default_library", "//pkg/kubelet/status:go_default_library", - "//pkg/kubelet/types:go_default_library", "//pkg/scheduler/framework:go_default_library", "//staging/src/k8s.io/api/core/v1:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library", @@ -70,6 +70,7 @@ go_library( "//pkg/kubelet/metrics:go_default_library", "//pkg/kubelet/qos:go_default_library", "//pkg/kubelet/stats/pidlimit:go_default_library", + "//pkg/kubelet/types:go_default_library", "//pkg/util/oom:go_default_library", "//pkg/util/procfs:go_default_library", "//pkg/util/sysctl:go_default_library", @@ -130,6 +131,7 @@ go_library( "//pkg/kubelet/metrics:go_default_library", "//pkg/kubelet/qos:go_default_library", "//pkg/kubelet/stats/pidlimit:go_default_library", + "//pkg/kubelet/types:go_default_library", "//pkg/util/oom:go_default_library", "//pkg/util/procfs:go_default_library", "//pkg/util/sysctl:go_default_library", diff --git a/pkg/kubelet/cm/container_manager.go b/pkg/kubelet/cm/container_manager.go index d72c16630eec..e4a710947187 100644 --- a/pkg/kubelet/cm/container_manager.go +++ b/pkg/kubelet/cm/container_manager.go @@ -17,6 +17,9 @@ limitations under the License. package cm import ( + "fmt" + "strconv" + "strings" "time" "k8s.io/apimachinery/pkg/util/sets" @@ -24,6 +27,7 @@ import ( v1 "k8s.io/api/core/v1" internalapi "k8s.io/cri-api/pkg/apis" podresourcesapi "k8s.io/kubelet/pkg/apis/podresources/v1" + kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config" "k8s.io/kubernetes/pkg/kubelet/cm/cpuset" "k8s.io/kubernetes/pkg/kubelet/config" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" @@ -31,12 +35,7 @@ import ( "k8s.io/kubernetes/pkg/kubelet/lifecycle" "k8s.io/kubernetes/pkg/kubelet/pluginmanager/cache" "k8s.io/kubernetes/pkg/kubelet/status" - kubetypes "k8s.io/kubernetes/pkg/kubelet/types" schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework" - - "fmt" - "strconv" - "strings" ) type ActivePodsFunc func() []*v1.Pod @@ -137,7 +136,7 @@ type NodeConfig struct { ExperimentalTopologyManagerScope string ExperimentalCPUManagerReconcilePeriod time.Duration ExperimentalMemoryManagerPolicy string - ExperimentalMemoryManagerReservedMemory kubetypes.NUMANodeResources + ExperimentalMemoryManagerReservedMemory []kubeletconfig.MemoryReservation ExperimentalPodPidsLimit int64 EnforceCPULimits bool CPUCFSQuotaPeriod time.Duration diff --git a/pkg/util/flag/BUILD b/pkg/util/flag/BUILD index 4e5ac790111b..4f8c6b4e94bd 100644 --- a/pkg/util/flag/BUILD +++ b/pkg/util/flag/BUILD @@ -11,6 +11,10 @@ go_library( srcs = ["flags.go"], importpath = "k8s.io/kubernetes/pkg/util/flag", deps = [ + "//pkg/apis/core/v1/helper:go_default_library", + "//pkg/kubelet/apis/config:go_default_library", + "//staging/src/k8s.io/api/core/v1:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/util/net:go_default_library", "//vendor/github.com/spf13/pflag:go_default_library", "//vendor/k8s.io/utils/net:go_default_library", @@ -34,5 +38,11 @@ go_test( name = "go_default_test", srcs = ["flags_test.go"], embed = [":go_default_library"], - deps = ["//vendor/github.com/spf13/pflag:go_default_library"], + deps = [ + "//pkg/kubelet/apis/config:go_default_library", + "//staging/src/k8s.io/api/core/v1:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/api/equality:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library", + "//vendor/github.com/spf13/pflag:go_default_library", + ], ) diff --git a/pkg/util/flag/flags.go b/pkg/util/flag/flags.go index 7c489e93ab37..9a4241d67bbb 100644 --- a/pkg/util/flag/flags.go +++ b/pkg/util/flag/flags.go @@ -19,10 +19,17 @@ package flag import ( "fmt" "net" + "sort" + "strconv" + "strings" "github.com/spf13/pflag" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" utilnet "k8s.io/apimachinery/pkg/util/net" + corev1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper" + kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config" utilsnet "k8s.io/utils/net" ) @@ -32,6 +39,7 @@ var ( _ pflag.Value = &IPVar{} _ pflag.Value = &IPPortVar{} _ pflag.Value = &PortRangeVar{} + _ pflag.Value = &ReservedMemoryVar{} ) // IPVar is used for validating a command line option that represents an IP. It implements the pflag.Value interface @@ -151,3 +159,99 @@ func (v PortRangeVar) String() string { func (v PortRangeVar) Type() string { return "port-range" } + +// ReservedMemoryVar is used for validating a command line option that represents a reserved memory. It implements the pflag.Value interface +type ReservedMemoryVar struct { + Value *[]kubeletconfig.MemoryReservation + initialized bool // set to true after the first Set call +} + +// Set sets the flag value +func (v *ReservedMemoryVar) Set(s string) error { + if v.Value == nil { + return fmt.Errorf("no target (nil pointer to *[]MemoryReservation") + } + + if s == "" { + v.Value = nil + return nil + } + + if !v.initialized || *v.Value == nil { + *v.Value = make([]kubeletconfig.MemoryReservation, 0) + v.initialized = true + } + + if s == "" { + return nil + } + + numaNodeReservation := strings.Split(s, ":") + if len(numaNodeReservation) != 2 { + return fmt.Errorf("the reserved memory has incorrect format, expected numaNodeID:type=quantity[,type=quantity...], got %s", s) + } + + memoryTypeReservations := strings.Split(numaNodeReservation[1], ",") + if len(memoryTypeReservations) < 1 { + return fmt.Errorf("the reserved memory has incorrect format, expected numaNodeID:type=quantity[,type=quantity...], got %s", s) + } + + numaNodeID, err := strconv.Atoi(numaNodeReservation[0]) + if err != nil { + return fmt.Errorf("failed to convert the NUMA node ID, exptected integer, got %s", numaNodeReservation[0]) + } + + memoryReservation := kubeletconfig.MemoryReservation{ + NumaNode: int32(numaNodeID), + Limits: map[v1.ResourceName]resource.Quantity{}, + } + + for _, reservation := range memoryTypeReservations { + limit := strings.Split(reservation, "=") + if len(limit) != 2 { + return fmt.Errorf("the reserved limit has incorrect value, expected type=quantatity, got %s", reservation) + } + + resourceName := v1.ResourceName(limit[0]) + if resourceName != v1.ResourceMemory && !corev1helper.IsHugePageResourceName(resourceName) { + return fmt.Errorf("memory type conversion error, unknown type: %q", resourceName) + } + + q, err := resource.ParseQuantity(limit[1]) + if err != nil { + return fmt.Errorf("failed to parse the quantatity, expected quantatity, got %s", limit[1]) + } + + memoryReservation.Limits[v1.ResourceName(limit[0])] = q + } + + *v.Value = append(*v.Value, memoryReservation) + + return nil +} + +// String returns the flag value +func (v *ReservedMemoryVar) String() string { + if v == nil || v.Value == nil { + return "" + } + + var slices []string + for _, reservedMemory := range *v.Value { + var limits []string + for resourceName, q := range reservedMemory.Limits { + limits = append(limits, fmt.Sprintf("%s=%s", resourceName, q.String())) + } + + sort.Strings(limits) + slices = append(slices, fmt.Sprintf("%d:%s", reservedMemory.NumaNode, strings.Join(limits, ","))) + } + + sort.Strings(slices) + return strings.Join(slices, ",") +} + +// Type gets the flag type +func (v *ReservedMemoryVar) Type() string { + return "reserved-memory" +} diff --git a/pkg/util/flag/flags_test.go b/pkg/util/flag/flags_test.go index b60f8e0ea484..cd1edd0f5e81 100644 --- a/pkg/util/flag/flags_test.go +++ b/pkg/util/flag/flags_test.go @@ -17,10 +17,16 @@ limitations under the License. package flag import ( + "fmt" "strings" "testing" "github.com/spf13/pflag" + + v1 "k8s.io/api/core/v1" + apiequality "k8s.io/apimachinery/pkg/api/equality" + "k8s.io/apimachinery/pkg/api/resource" + kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config" ) func TestIPVar(t *testing.T) { @@ -163,3 +169,121 @@ func TestIPPortVar(t *testing.T) { } } } + +func TestReservedMemoryVar(t *testing.T) { + resourceNameHugepages1Gi := v1.ResourceName(fmt.Sprintf("%s1Gi", v1.ResourceHugePagesPrefix)) + memory1Gi := resource.MustParse("1Gi") + testCases := []struct { + desc string + argc string + expectErr bool + expectVal []kubeletconfig.MemoryReservation + }{ + { + desc: "valid input", + argc: "blah --reserved-memory=0:memory=1Gi", + expectVal: []kubeletconfig.MemoryReservation{ + { + NumaNode: 0, + Limits: v1.ResourceList{ + v1.ResourceMemory: memory1Gi, + }, + }, + }, + }, + { + desc: "valid input with multiple memory types", + argc: "blah --reserved-memory=0:memory=1Gi,hugepages-1Gi=1Gi", + expectVal: []kubeletconfig.MemoryReservation{ + { + NumaNode: 0, + Limits: v1.ResourceList{ + v1.ResourceMemory: memory1Gi, + resourceNameHugepages1Gi: memory1Gi, + }, + }, + }, + }, + { + desc: "valid input with multiple reserved-memory arguments", + argc: "blah --reserved-memory=0:memory=1Gi,hugepages-1Gi=1Gi --reserved-memory=1:memory=1Gi", + expectVal: []kubeletconfig.MemoryReservation{ + { + NumaNode: 0, + Limits: v1.ResourceList{ + v1.ResourceMemory: memory1Gi, + resourceNameHugepages1Gi: memory1Gi, + }, + }, + { + NumaNode: 1, + Limits: v1.ResourceList{ + v1.ResourceMemory: memory1Gi, + }, + }, + }, + }, + { + desc: "invalid input", + argc: "blah --reserved-memory=bad-input", + expectVal: nil, + expectErr: true, + }, + { + desc: "invalid input without memory types", + argc: "blah --reserved-memory=0:", + expectVal: nil, + expectErr: true, + }, + { + desc: "invalid input with non-integer NUMA node", + argc: "blah --reserved-memory=a:memory=1Gi", + expectVal: nil, + expectErr: true, + }, + { + desc: "invalid input with invalid limit", + argc: "blah --reserved-memory=0:memory=", + expectVal: nil, + expectErr: true, + }, + { + desc: "invalid input with invalid memory type", + argc: "blah --reserved-memory=0:type=1Gi", + expectVal: nil, + expectErr: true, + }, + { + desc: "invalid input with invalid quantity", + argc: "blah --reserved-memory=0:memory=1Be", + expectVal: nil, + expectErr: true, + }, + } + for _, tc := range testCases { + fs := pflag.NewFlagSet("blah", pflag.PanicOnError) + + var reservedMemory []kubeletconfig.MemoryReservation + fs.Var(&ReservedMemoryVar{Value: &reservedMemory}, "reserved-memory", "--reserved-memory 0:memory=1Gi,hugepages-1M=2Gi") + + var err error + func() { + defer func() { + if r := recover(); r != nil { + err = r.(error) + } + }() + fs.Parse(strings.Split(tc.argc, " ")) + }() + + if tc.expectErr && err == nil { + t.Fatalf("%q: Did not observe an expected error", tc.desc) + } + if !tc.expectErr && err != nil { + t.Fatalf("%q: Observed an unexpected error: %v", tc.desc, err) + } + if !apiequality.Semantic.DeepEqual(reservedMemory, tc.expectVal) { + t.Fatalf("%q: Unexpected reserved-error: expected %v, saw %v", tc.desc, tc.expectVal, reservedMemory) + } + } +} diff --git a/staging/src/k8s.io/component-base/cli/flag/BUILD b/staging/src/k8s.io/component-base/cli/flag/BUILD index 25d290f7e85e..9c52db989892 100644 --- a/staging/src/k8s.io/component-base/cli/flag/BUILD +++ b/staging/src/k8s.io/component-base/cli/flag/BUILD @@ -9,7 +9,6 @@ load( go_test( name = "go_default_test", srcs = [ - "bracket_separated_slice_map_string_string_test.go", "ciphersuites_flag_test.go", "colon_separated_multimap_string_string_test.go", "langle_separated_map_string_string_test.go", @@ -25,7 +24,6 @@ go_test( go_library( name = "go_default_library", srcs = [ - "bracket_separated_slice_map_string_string.go", "ciphersuites_flag.go", "ciphersuites_flag_114.go", "colon_separated_multimap_string_string.go", diff --git a/staging/src/k8s.io/component-base/cli/flag/bracket_separated_slice_map_string_string.go b/staging/src/k8s.io/component-base/cli/flag/bracket_separated_slice_map_string_string.go deleted file mode 100644 index e3a99c872bdd..000000000000 --- a/staging/src/k8s.io/component-base/cli/flag/bracket_separated_slice_map_string_string.go +++ /dev/null @@ -1,111 +0,0 @@ -/* -Copyright 2020 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package flag - -import ( - "fmt" - "sort" - "strings" -) - -// BracketSeparatedSliceMapStringString can be set from the command line with the format `--flag {key=value, ...}, {...}`. -// Multiple comma-separated key-value pairs in brackets (`{}`) in a single invocation are supported. For example: `--flag {key=value, key=value, ...}`. -// Multiple bracket-separated list of key-value pairs in a single invocation are supported. For example: `--flag {key=value, key=value}, {key=value, key=value}`. -type BracketSeparatedSliceMapStringString struct { - Value *[]map[string]string - initialized bool // set to true after the first Set call -} - -// NewBracketSeparatedSliceMapStringString takes a pointer to a []map[string]string and returns the -// BracketSeparatedSliceMapStringString flag parsing shim for that map -func NewBracketSeparatedSliceMapStringString(m *[]map[string]string) *BracketSeparatedSliceMapStringString { - return &BracketSeparatedSliceMapStringString{Value: m} -} - -// Set implements github.com/spf13/pflag.Value -func (m *BracketSeparatedSliceMapStringString) Set(value string) error { - if m.Value == nil { - return fmt.Errorf("no target (nil pointer to []map[string]string)") - } - if !m.initialized || *m.Value == nil { - *m.Value = make([]map[string]string, 0) - m.initialized = true - } - - value = strings.TrimSpace(value) - - for _, split := range strings.Split(value, ",{") { - split = strings.TrimLeft(split, "{") - split = strings.TrimRight(split, "}") - - if len(split) == 0 { - continue - } - - // now we have "numa-node=1,memory-type=memory,limit=1Gi" - tmpRawMap := make(map[string]string) - - tmpMap := NewMapStringString(&tmpRawMap) - - if err := tmpMap.Set(split); err != nil { - return fmt.Errorf("could not parse String: (%s): %v", value, err) - } - - *m.Value = append(*m.Value, tmpRawMap) - } - - return nil -} - -// String implements github.com/spf13/pflag.Value -func (m *BracketSeparatedSliceMapStringString) String() string { - if m == nil || m.Value == nil { - return "" - } - - var slices []string - - for _, configMap := range *m.Value { - var tmpPairs []string - - var keys []string - for key := range configMap { - keys = append(keys, key) - } - sort.Strings(keys) - - for _, key := range keys { - tmpPairs = append(tmpPairs, fmt.Sprintf("%s=%s", key, configMap[key])) - } - - if len(tmpPairs) != 0 { - slices = append(slices, "{"+strings.Join(tmpPairs, ",")+"}") - } - } - sort.Strings(slices) - return strings.Join(slices, ",") -} - -// Type implements github.com/spf13/pflag.Value -func (*BracketSeparatedSliceMapStringString) Type() string { - return "BracketSeparatedSliceMapStringString" -} - -// Empty implements OmitEmpty -func (m *BracketSeparatedSliceMapStringString) Empty() bool { - return !m.initialized || m.Value == nil || len(*m.Value) == 0 -} diff --git a/staging/src/k8s.io/component-base/cli/flag/bracket_separated_slice_map_string_string_test.go b/staging/src/k8s.io/component-base/cli/flag/bracket_separated_slice_map_string_string_test.go deleted file mode 100644 index 84049d1ebc2b..000000000000 --- a/staging/src/k8s.io/component-base/cli/flag/bracket_separated_slice_map_string_string_test.go +++ /dev/null @@ -1,178 +0,0 @@ -/* -Copyright 2020 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package flag - -import ( - "reflect" - "testing" -) - -func TestStringBracketSeparatedSliceMapStringString(t *testing.T) { - var nilSliceMap []map[string]string - testCases := []struct { - desc string - m *BracketSeparatedSliceMapStringString - expect string - }{ - {"nil", NewBracketSeparatedSliceMapStringString(&nilSliceMap), ""}, - {"empty", NewBracketSeparatedSliceMapStringString(&[]map[string]string{}), ""}, - {"one key", NewBracketSeparatedSliceMapStringString(&[]map[string]string{{"a": "string"}}), "{a=string}"}, - {"two keys", NewBracketSeparatedSliceMapStringString(&[]map[string]string{{"a": "string", "b": "string"}}), "{a=string,b=string}"}, - } - for _, tc := range testCases { - t.Run(tc.desc, func(t *testing.T) { - str := tc.m.String() - if tc.expect != str { - t.Fatalf("expect %q but got %q", tc.expect, str) - } - }) - } -} - -func TestSetBracketSeparatedSliceMapStringString(t *testing.T) { - var nilMap []map[string]string - testCases := []struct { - desc string - vals []string - start *BracketSeparatedSliceMapStringString - expect *BracketSeparatedSliceMapStringString - err string - }{ - // we initialize the map with a default key that should be cleared by Set - {"clears defaults", []string{""}, - NewBracketSeparatedSliceMapStringString(&[]map[string]string{{"default": ""}}), - &BracketSeparatedSliceMapStringString{ - initialized: true, - Value: &[]map[string]string{}, - }, ""}, - // make sure we still allocate for "initialized" multimaps where Multimap was initially set to a nil map - {"allocates map if currently nil", []string{""}, - &BracketSeparatedSliceMapStringString{initialized: true, Value: &nilMap}, - &BracketSeparatedSliceMapStringString{ - initialized: true, - Value: &[]map[string]string{}, - }, ""}, - // for most cases, we just reuse nilMap, which should be allocated by Set, and is reset before each test case - {"empty", []string{""}, - NewBracketSeparatedSliceMapStringString(&nilMap), - &BracketSeparatedSliceMapStringString{ - initialized: true, - Value: &[]map[string]string{}, - }, ""}, - {"empty bracket", []string{"{}"}, - NewBracketSeparatedSliceMapStringString(&nilMap), - &BracketSeparatedSliceMapStringString{ - initialized: true, - Value: &[]map[string]string{}, - }, ""}, - {"missing bracket", []string{"a=string, b=string"}, - NewBracketSeparatedSliceMapStringString(&nilMap), - &BracketSeparatedSliceMapStringString{ - initialized: true, - Value: &[]map[string]string{{"a": "string", "b": "string"}}, - }, ""}, - {"empty key", []string{"{=string}"}, - NewBracketSeparatedSliceMapStringString(&nilMap), - &BracketSeparatedSliceMapStringString{ - initialized: true, - Value: &[]map[string]string{{"": "string"}}, - }, ""}, - {"one key", []string{"{a=string}"}, - NewBracketSeparatedSliceMapStringString(&nilMap), - &BracketSeparatedSliceMapStringString{ - initialized: true, - Value: &[]map[string]string{{"a": "string"}}, - }, ""}, - {"two keys", []string{"{a=string,b=string}"}, - NewBracketSeparatedSliceMapStringString(&nilMap), - &BracketSeparatedSliceMapStringString{ - initialized: true, - Value: &[]map[string]string{{"a": "string", "b": "string"}}, - }, ""}, - {"two duplicated keys", []string{"{a=string,a=string}"}, - NewBracketSeparatedSliceMapStringString(&nilMap), - &BracketSeparatedSliceMapStringString{ - initialized: true, - Value: &[]map[string]string{{"a": "string"}}, - }, ""}, - {"two keys with spaces", []string{"{a = string, b = string}"}, - NewBracketSeparatedSliceMapStringString(&nilMap), - &BracketSeparatedSliceMapStringString{ - initialized: true, - Value: &[]map[string]string{{"a": "string", "b": "string"}}, - }, ""}, - {"two keys, multiple Set invocations", []string{"{a=string, b=string}", "{a=string, b=string}"}, - NewBracketSeparatedSliceMapStringString(&nilMap), - &BracketSeparatedSliceMapStringString{ - initialized: true, - Value: &[]map[string]string{{"a": "string", "b": "string"}, {"a": "string", "b": "string"}}, - }, ""}, - {"no target", []string{""}, - NewBracketSeparatedSliceMapStringString(nil), - nil, - "no target (nil pointer to []map[string]string)"}, - } - for _, tc := range testCases { - nilMap = nil - t.Run(tc.desc, func(t *testing.T) { - var err error - for _, val := range tc.vals { - err = tc.start.Set(val) - if err != nil { - break - } - } - if tc.err != "" { - if err == nil || err.Error() != tc.err { - t.Fatalf("expect error %s but got %v", tc.err, err) - } - return - } else if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if !reflect.DeepEqual(tc.expect, tc.start) { - t.Fatalf("expect %#v but got %#v", tc.expect, tc.start) - } - }) - } -} - -func TestEmptyBracketSeparatedSliceMapStringString(t *testing.T) { - var nilSliceMap []map[string]string - notEmpty := &BracketSeparatedSliceMapStringString{ - Value: &[]map[string]string{{"a": "int", "b": "string", "c": "string"}}, - initialized: true, - } - - testCases := []struct { - desc string - m *BracketSeparatedSliceMapStringString - expect bool - }{ - {"nil", NewBracketSeparatedSliceMapStringString(&nilSliceMap), true}, - {"empty", NewBracketSeparatedSliceMapStringString(&[]map[string]string{}), true}, - {"populated", notEmpty, false}, - } - for _, tc := range testCases { - t.Run(tc.desc, func(t *testing.T) { - ret := tc.m.Empty() - if ret != tc.expect { - t.Fatalf("expect %t but got %t", tc.expect, ret) - } - }) - } -} From e8ea461bfdafd09747bf6e1ed37743f261f28968 Mon Sep 17 00:00:00 2001 From: Artyom Lukianov Date: Wed, 18 Nov 2020 00:22:43 +0200 Subject: [PATCH 29/31] memory manager: update all relevant part of code to use []MemoryReservation Signed-off-by: Artyom Lukianov --- pkg/features/kube_features.go | 2 +- pkg/kubelet/cm/memorymanager/BUILD | 4 +- .../cm/memorymanager/memory_manager.go | 49 +++-- .../cm/memorymanager/memory_manager_test.go | 202 +++++++++++++----- pkg/kubelet/types/BUILD | 1 - pkg/kubelet/types/types.go | 4 - test/e2e_node/memory_manager_test.go | 22 +- 7 files changed, 186 insertions(+), 98 deletions(-) diff --git a/pkg/features/kube_features.go b/pkg/features/kube_features.go index 0801411b39c7..b22bc6659c41 100644 --- a/pkg/features/kube_features.go +++ b/pkg/features/kube_features.go @@ -126,7 +126,7 @@ const ( // owner: @cynepco3hahue(alukiano) @cezaryzukowski @k-wiatrzyk // alpha:: v1.20 - // Allows to set containers memory affinity according to NUMA topology + // Allows setting memory affinity for a container based on NUMA topology MemoryManager featuregate.Feature = "MemoryManager" // owner: @sjenning diff --git a/pkg/kubelet/cm/memorymanager/BUILD b/pkg/kubelet/cm/memorymanager/BUILD index 4d0552e2a826..2fd8286057c5 100644 --- a/pkg/kubelet/cm/memorymanager/BUILD +++ b/pkg/kubelet/cm/memorymanager/BUILD @@ -14,13 +14,13 @@ go_library( deps = [ "//pkg/apis/core/v1/helper:go_default_library", "//pkg/apis/core/v1/helper/qos:go_default_library", + "//pkg/kubelet/apis/config:go_default_library", "//pkg/kubelet/cm/containermap:go_default_library", "//pkg/kubelet/cm/memorymanager/state:go_default_library", "//pkg/kubelet/cm/topologymanager:go_default_library", "//pkg/kubelet/cm/topologymanager/bitmask:go_default_library", "//pkg/kubelet/config:go_default_library", "//pkg/kubelet/status:go_default_library", - "//pkg/kubelet/types:go_default_library", "//staging/src/k8s.io/api/core/v1:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library", "//staging/src/k8s.io/cri-api/pkg/apis/runtime/v1alpha2:go_default_library", @@ -37,12 +37,12 @@ go_test( ], embed = [":go_default_library"], deps = [ + "//pkg/kubelet/apis/config:go_default_library", "//pkg/kubelet/cm/containermap:go_default_library", "//pkg/kubelet/cm/memorymanager/state:go_default_library", "//pkg/kubelet/cm/topologymanager:go_default_library", "//pkg/kubelet/cm/topologymanager/bitmask:go_default_library", "//pkg/kubelet/lifecycle:go_default_library", - "//pkg/kubelet/types:go_default_library", "//pkg/kubelet/util/format:go_default_library", "//staging/src/k8s.io/api/core/v1:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library", diff --git a/pkg/kubelet/cm/memorymanager/memory_manager.go b/pkg/kubelet/cm/memorymanager/memory_manager.go index 4929eaa2699d..7840d6d0b25f 100644 --- a/pkg/kubelet/cm/memorymanager/memory_manager.go +++ b/pkg/kubelet/cm/memorymanager/memory_manager.go @@ -29,12 +29,12 @@ import ( runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" "k8s.io/klog/v2" corev1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper" + kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config" "k8s.io/kubernetes/pkg/kubelet/cm/containermap" "k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" "k8s.io/kubernetes/pkg/kubelet/config" "k8s.io/kubernetes/pkg/kubelet/status" - kubetypes "k8s.io/kubernetes/pkg/kubelet/types" ) // memoryManagerStateFileName is the file name where memory manager stores its state @@ -67,7 +67,7 @@ type Manager interface { Allocate(pod *v1.Pod, container *v1.Container) error // RemoveContainer is called after Kubelet decides to kill or delete a - // container. After this call, any memory allocated to the container are freed. + // container. After this call, any memory allocated to the container is freed. RemoveContainer(containerID string) error // State returns a read-only interface to the internal memory manager state. @@ -119,7 +119,7 @@ type manager struct { var _ Manager = &manager{} // NewManager returns new instance of the memory manager -func NewManager(policyName string, machineInfo *cadvisorapi.MachineInfo, nodeAllocatableReservation v1.ResourceList, reservedMemory kubetypes.NUMANodeResources, stateFileDirectory string, affinity topologymanager.Store) (Manager, error) { +func NewManager(policyName string, machineInfo *cadvisorapi.MachineInfo, nodeAllocatableReservation v1.ResourceList, reservedMemory []kubeletconfig.MemoryReservation, stateFileDirectory string, affinity topologymanager.Store) (Manager, error) { var policy Policy switch policyType(policyName) { @@ -321,7 +321,7 @@ func (m *manager) policyRemoveContainerByRef(podUID string, containerName string return err } -func getTotalMemoryTypeReserved(machineInfo *cadvisorapi.MachineInfo, reservedMemory kubetypes.NUMANodeResources) map[v1.ResourceName]resource.Quantity { +func getTotalMemoryTypeReserved(machineInfo *cadvisorapi.MachineInfo, reservedMemory []kubeletconfig.MemoryReservation) map[v1.ResourceName]resource.Quantity { totalMemoryType := map[v1.ResourceName]resource.Quantity{} numaNodes := map[int]bool{} @@ -329,24 +329,24 @@ func getTotalMemoryTypeReserved(machineInfo *cadvisorapi.MachineInfo, reservedMe numaNodes[numaNode.Id] = true } - for nodeID, node := range reservedMemory { - if !numaNodes[nodeID] { - klog.Warningf("The NUMA node %d specified under --reserved- memory does not exist on the machine", nodeID) + for _, reservation := range reservedMemory { + if !numaNodes[int(reservation.NumaNode)] { + klog.Warningf("The NUMA node %d specified under --reserved-memory does not exist on the machine", reservation.NumaNode) continue } - for memType, memVal := range node { - if totalMem, exists := totalMemoryType[memType]; exists { - memVal.Add(totalMem) + for resourceName, q := range reservation.Limits { + if value, ok := totalMemoryType[resourceName]; ok { + q.Add(value) } - totalMemoryType[memType] = memVal + totalMemoryType[resourceName] = q } } return totalMemoryType } -func validateReservedMemory(machineInfo *cadvisorapi.MachineInfo, nodeAllocatableReservation v1.ResourceList, reservedMemory kubetypes.NUMANodeResources) error { +func validateReservedMemory(machineInfo *cadvisorapi.MachineInfo, nodeAllocatableReservation v1.ResourceList, reservedMemory []kubeletconfig.MemoryReservation) error { totalMemoryType := getTotalMemoryTypeReserved(machineInfo, reservedMemory) commonMemoryTypeSet := make(map[v1.ResourceName]bool) @@ -382,32 +382,31 @@ func validateReservedMemory(machineInfo *cadvisorapi.MachineInfo, nodeAllocatabl return nil } -func convertReserved(machineInfo *cadvisorapi.MachineInfo, reservedMemory kubetypes.NUMANodeResources) (systemReservedMemory, error) { - preReservedMemoryConverted := make(map[int]map[v1.ResourceName]uint64) +func convertReserved(machineInfo *cadvisorapi.MachineInfo, reservedMemory []kubeletconfig.MemoryReservation) (systemReservedMemory, error) { + reservedMemoryConverted := make(map[int]map[v1.ResourceName]uint64) for _, node := range machineInfo.Topology { - preReservedMemoryConverted[node.Id] = make(map[v1.ResourceName]uint64) + reservedMemoryConverted[node.Id] = make(map[v1.ResourceName]uint64) } - for numaIndex := range reservedMemory { - for memoryType := range reservedMemory[numaIndex] { - tmp := reservedMemory[numaIndex][memoryType] - if val, success := tmp.AsInt64(); success { - preReservedMemoryConverted[numaIndex][memoryType] = uint64(val) - } else { + for _, reservation := range reservedMemory { + for resourceName, q := range reservation.Limits { + val, success := q.AsInt64() + if !success { return nil, fmt.Errorf("could not covert a variable of type Quantity to int64") } + reservedMemoryConverted[int(reservation.NumaNode)][resourceName] = uint64(val) } } - return preReservedMemoryConverted, nil + return reservedMemoryConverted, nil } -func getSystemReservedMemory(machineInfo *cadvisorapi.MachineInfo, nodeAllocatableReservation v1.ResourceList, preReservedMemory kubetypes.NUMANodeResources) (systemReservedMemory, error) { - if err := validateReservedMemory(machineInfo, nodeAllocatableReservation, preReservedMemory); err != nil { +func getSystemReservedMemory(machineInfo *cadvisorapi.MachineInfo, nodeAllocatableReservation v1.ResourceList, reservedMemory []kubeletconfig.MemoryReservation) (systemReservedMemory, error) { + if err := validateReservedMemory(machineInfo, nodeAllocatableReservation, reservedMemory); err != nil { return nil, err } - reservedMemoryConverted, err := convertReserved(machineInfo, preReservedMemory) + reservedMemoryConverted, err := convertReserved(machineInfo, reservedMemory) if err != nil { return nil, err } diff --git a/pkg/kubelet/cm/memorymanager/memory_manager_test.go b/pkg/kubelet/cm/memorymanager/memory_manager_test.go index e336b9297190..e1d0f20d86d4 100644 --- a/pkg/kubelet/cm/memorymanager/memory_manager_test.go +++ b/pkg/kubelet/cm/memorymanager/memory_manager_test.go @@ -24,6 +24,8 @@ import ( "strings" "testing" + kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config" + cadvisorapi "github.com/google/cadvisor/info/v1" "github.com/stretchr/testify/assert" v1 "k8s.io/api/core/v1" @@ -34,7 +36,6 @@ import ( "k8s.io/kubernetes/pkg/kubelet/cm/containermap" "k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" - kubetypes "k8s.io/kubernetes/pkg/kubelet/types" ) const ( @@ -59,7 +60,7 @@ type testMemoryManager struct { nodeAllocatableReservation v1.ResourceList policyName policyType affinity topologymanager.Store - systemReservedMemory kubetypes.NUMANodeResources + systemReservedMemory []kubeletconfig.MemoryReservation expectedHints map[string][]topologymanager.TopologyHint expectedReserved systemReservedMemory reserved systemReservedMemory @@ -69,7 +70,7 @@ type testMemoryManager struct { } func returnPolicyByName(testCase testMemoryManager) Policy { - switch policyType(testCase.policyName) { + switch testCase.policyName { case policyTypeMock: return &mockPolicy{ err: fmt.Errorf("fake reg error"), @@ -83,8 +84,6 @@ func returnPolicyByName(testCase testMemoryManager) Policy { return nil } -type nodeResources map[v1.ResourceName]resource.Quantity - type mockPolicy struct { err error } @@ -158,22 +157,27 @@ func TestValidateReservedMemory(t *testing.T) { description string nodeAllocatableReservation v1.ResourceList machineInfo *cadvisorapi.MachineInfo - systemReservedMemory kubetypes.NUMANodeResources + systemReservedMemory []kubeletconfig.MemoryReservation expectedError string }{ { "Node Allocatable not set, reserved not set", v1.ResourceList{}, machineInfo, - kubetypes.NUMANodeResources{}, + []kubeletconfig.MemoryReservation{}, "", }, { "Node Allocatable set to zero, reserved set to zero", v1.ResourceList{v1.ResourceMemory: *resource.NewQuantity(0, resource.DecimalSI)}, machineInfo, - kubetypes.NUMANodeResources{ - 0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(0, resource.DecimalSI)}, + []kubeletconfig.MemoryReservation{ + { + NumaNode: 0, + Limits: v1.ResourceList{ + v1.ResourceMemory: *resource.NewQuantity(0, resource.DecimalSI), + }, + }, }, "", }, @@ -181,8 +185,13 @@ func TestValidateReservedMemory(t *testing.T) { "Node Allocatable not set (equal zero), reserved set", v1.ResourceList{}, machineInfo, - kubetypes.NUMANodeResources{ - 0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(12, resource.DecimalSI)}, + []kubeletconfig.MemoryReservation{ + { + NumaNode: 0, + Limits: v1.ResourceList{ + v1.ResourceMemory: *resource.NewQuantity(12, resource.DecimalSI), + }, + }, }, fmt.Sprintf(msgNotEqual, v1.ResourceMemory), }, @@ -190,15 +199,20 @@ func TestValidateReservedMemory(t *testing.T) { "Node Allocatable set, reserved not set", v1.ResourceList{hugepages2M: *resource.NewQuantity(5, resource.DecimalSI)}, machineInfo, - kubetypes.NUMANodeResources{}, + []kubeletconfig.MemoryReservation{}, fmt.Sprintf(msgNotEqual, hugepages2M), }, { "Reserved not equal to Node Allocatable", v1.ResourceList{v1.ResourceMemory: *resource.NewQuantity(5, resource.DecimalSI)}, machineInfo, - kubetypes.NUMANodeResources{ - 0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(12, resource.DecimalSI)}, + []kubeletconfig.MemoryReservation{ + { + NumaNode: 0, + Limits: v1.ResourceList{ + v1.ResourceMemory: *resource.NewQuantity(12, resource.DecimalSI), + }, + }, }, fmt.Sprintf(msgNotEqual, v1.ResourceMemory), }, @@ -206,9 +220,19 @@ func TestValidateReservedMemory(t *testing.T) { "Reserved contains the NUMA node that does not exist under the machine", v1.ResourceList{v1.ResourceMemory: *resource.NewQuantity(17, resource.DecimalSI)}, machineInfo, - kubetypes.NUMANodeResources{ - 0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(12, resource.DecimalSI)}, - 2: nodeResources{v1.ResourceMemory: *resource.NewQuantity(5, resource.DecimalSI)}, + []kubeletconfig.MemoryReservation{ + { + NumaNode: 0, + Limits: v1.ResourceList{ + v1.ResourceMemory: *resource.NewQuantity(12, resource.DecimalSI), + }, + }, + { + NumaNode: 2, + Limits: v1.ResourceList{ + v1.ResourceMemory: *resource.NewQuantity(5, resource.DecimalSI), + }, + }, }, fmt.Sprintf(msgNotEqual, v1.ResourceMemory), }, @@ -218,12 +242,22 @@ func TestValidateReservedMemory(t *testing.T) { hugepages2M: *resource.NewQuantity(77, resource.DecimalSI), hugepages1G: *resource.NewQuantity(13, resource.DecimalSI)}, machineInfo, - kubetypes.NUMANodeResources{ - 0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(12, resource.DecimalSI), - hugepages2M: *resource.NewQuantity(70, resource.DecimalSI), - hugepages1G: *resource.NewQuantity(13, resource.DecimalSI)}, - 1: nodeResources{v1.ResourceMemory: *resource.NewQuantity(5, resource.DecimalSI), - hugepages2M: *resource.NewQuantity(7, resource.DecimalSI)}, + []kubeletconfig.MemoryReservation{ + { + NumaNode: 0, + Limits: v1.ResourceList{ + v1.ResourceMemory: *resource.NewQuantity(12, resource.DecimalSI), + hugepages2M: *resource.NewQuantity(70, resource.DecimalSI), + hugepages1G: *resource.NewQuantity(13, resource.DecimalSI), + }, + }, + { + NumaNode: 1, + Limits: v1.ResourceList{ + v1.ResourceMemory: *resource.NewQuantity(5, resource.DecimalSI), + hugepages2M: *resource.NewQuantity(7, resource.DecimalSI), + }, + }, }, "", }, @@ -233,12 +267,22 @@ func TestValidateReservedMemory(t *testing.T) { hugepages2M: *resource.NewQuantity(14, resource.DecimalSI), hugepages1G: *resource.NewQuantity(13, resource.DecimalSI)}, machineInfo, - kubetypes.NUMANodeResources{ - 0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(12, resource.DecimalSI), - hugepages2M: *resource.NewQuantity(70, resource.DecimalSI), - hugepages1G: *resource.NewQuantity(13, resource.DecimalSI)}, - 1: nodeResources{v1.ResourceMemory: *resource.NewQuantity(5, resource.DecimalSI), - hugepages2M: *resource.NewQuantity(7, resource.DecimalSI)}, + []kubeletconfig.MemoryReservation{ + { + NumaNode: 0, + Limits: v1.ResourceList{ + v1.ResourceMemory: *resource.NewQuantity(12, resource.DecimalSI), + hugepages2M: *resource.NewQuantity(70, resource.DecimalSI), + hugepages1G: *resource.NewQuantity(13, resource.DecimalSI), + }, + }, + { + NumaNode: 1, + Limits: v1.ResourceList{ + v1.ResourceMemory: *resource.NewQuantity(5, resource.DecimalSI), + hugepages2M: *resource.NewQuantity(7, resource.DecimalSI), + }, + }, }, fmt.Sprintf(msgNotEqual, hugepages2M), @@ -266,13 +310,13 @@ func TestConvertPreReserved(t *testing.T) { testCases := []struct { description string - systemReserved kubetypes.NUMANodeResources + systemReserved []kubeletconfig.MemoryReservation systemReservedExpected systemReservedMemory expectedError string }{ { "Empty", - kubetypes.NUMANodeResources{}, + []kubeletconfig.MemoryReservation{}, systemReservedMemory{ 0: map[v1.ResourceName]uint64{}, 1: map[v1.ResourceName]uint64{}, @@ -281,10 +325,15 @@ func TestConvertPreReserved(t *testing.T) { }, { "Single NUMA node is reserved", - kubetypes.NUMANodeResources{ - 0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(12, resource.DecimalSI), - hugepages2M: *resource.NewQuantity(70, resource.DecimalSI), - hugepages1G: *resource.NewQuantity(13, resource.DecimalSI)}, + []kubeletconfig.MemoryReservation{ + { + NumaNode: 0, + Limits: v1.ResourceList{ + v1.ResourceMemory: *resource.NewQuantity(12, resource.DecimalSI), + hugepages2M: *resource.NewQuantity(70, resource.DecimalSI), + hugepages1G: *resource.NewQuantity(13, resource.DecimalSI), + }, + }, }, systemReservedMemory{ 0: map[v1.ResourceName]uint64{ @@ -298,12 +347,22 @@ func TestConvertPreReserved(t *testing.T) { }, { "Both NUMA nodes are reserved", - kubetypes.NUMANodeResources{ - 0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(12, resource.DecimalSI), - hugepages2M: *resource.NewQuantity(70, resource.DecimalSI), - hugepages1G: *resource.NewQuantity(13, resource.DecimalSI)}, - 1: nodeResources{v1.ResourceMemory: *resource.NewQuantity(5, resource.DecimalSI), - hugepages2M: *resource.NewQuantity(7, resource.DecimalSI)}, + []kubeletconfig.MemoryReservation{ + { + NumaNode: 0, + Limits: v1.ResourceList{ + v1.ResourceMemory: *resource.NewQuantity(12, resource.DecimalSI), + hugepages2M: *resource.NewQuantity(70, resource.DecimalSI), + hugepages1G: *resource.NewQuantity(13, resource.DecimalSI), + }, + }, + { + NumaNode: 1, + Limits: v1.ResourceList{ + v1.ResourceMemory: *resource.NewQuantity(5, resource.DecimalSI), + hugepages2M: *resource.NewQuantity(7, resource.DecimalSI), + }, + }, }, systemReservedMemory{ 0: map[v1.ResourceName]uint64{ @@ -336,7 +395,7 @@ func TestGetSystemReservedMemory(t *testing.T) { { description: "Should return empty map when reservation is not done", nodeAllocatableReservation: v1.ResourceList{}, - systemReservedMemory: kubetypes.NUMANodeResources{}, + systemReservedMemory: []kubeletconfig.MemoryReservation{}, expectedReserved: systemReservedMemory{ 0: {}, 1: {}, @@ -347,8 +406,13 @@ func TestGetSystemReservedMemory(t *testing.T) { { description: "Should return error when Allocatable reservation is not equal pre reserved memory", nodeAllocatableReservation: v1.ResourceList{}, - systemReservedMemory: kubetypes.NUMANodeResources{ - 0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(gb, resource.BinarySI)}, + systemReservedMemory: []kubeletconfig.MemoryReservation{ + { + NumaNode: 0, + Limits: v1.ResourceList{ + v1.ResourceMemory: *resource.NewQuantity(gb, resource.BinarySI), + }, + }, }, expectedReserved: nil, expectedError: fmt.Errorf("the total amount of memory of type \"memory\" is not equal to the value determined by Node Allocatable feature"), @@ -357,9 +421,19 @@ func TestGetSystemReservedMemory(t *testing.T) { { description: "Reserved should be equal to systemReservedMemory", nodeAllocatableReservation: v1.ResourceList{v1.ResourceMemory: *resource.NewQuantity(2*gb, resource.BinarySI)}, - systemReservedMemory: kubetypes.NUMANodeResources{ - 0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(gb, resource.BinarySI)}, - 1: nodeResources{v1.ResourceMemory: *resource.NewQuantity(gb, resource.BinarySI)}, + systemReservedMemory: []kubeletconfig.MemoryReservation{ + { + NumaNode: 0, + Limits: v1.ResourceList{ + v1.ResourceMemory: *resource.NewQuantity(gb, resource.BinarySI), + }, + }, + { + NumaNode: 1, + Limits: v1.ResourceList{ + v1.ResourceMemory: *resource.NewQuantity(gb, resource.BinarySI), + }, + }, }, expectedReserved: systemReservedMemory{ 0: map[v1.ResourceName]uint64{ @@ -2063,9 +2137,15 @@ func TestNewManager(t *testing.T) { policyName: policyTypeStatic, machineInfo: machineInfo, nodeAllocatableReservation: v1.ResourceList{v1.ResourceMemory: *resource.NewQuantity(2*gb, resource.BinarySI)}, - systemReservedMemory: kubetypes.NUMANodeResources{ - 0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(gb, resource.BinarySI)}, - 1: nodeResources{v1.ResourceMemory: *resource.NewQuantity(gb, resource.BinarySI)}, + systemReservedMemory: []kubeletconfig.MemoryReservation{ + { + NumaNode: 0, + Limits: v1.ResourceList{v1.ResourceMemory: *resource.NewQuantity(gb, resource.BinarySI)}, + }, + { + NumaNode: 1, + Limits: v1.ResourceList{v1.ResourceMemory: *resource.NewQuantity(gb, resource.BinarySI)}, + }, }, affinity: topologymanager.NewFakeManager(), expectedError: nil, @@ -2076,9 +2156,19 @@ func TestNewManager(t *testing.T) { policyName: policyTypeStatic, machineInfo: machineInfo, nodeAllocatableReservation: v1.ResourceList{v1.ResourceMemory: *resource.NewQuantity(2*gb, resource.BinarySI)}, - systemReservedMemory: kubetypes.NUMANodeResources{ - 0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(gb, resource.BinarySI)}, - 1: nodeResources{v1.ResourceMemory: *resource.NewQuantity(2*gb, resource.BinarySI)}, + systemReservedMemory: []kubeletconfig.MemoryReservation{ + { + NumaNode: 0, + Limits: v1.ResourceList{ + v1.ResourceMemory: *resource.NewQuantity(gb, resource.BinarySI), + }, + }, + { + NumaNode: 1, + Limits: v1.ResourceList{ + v1.ResourceMemory: *resource.NewQuantity(2*gb, resource.BinarySI), + }, + }, }, affinity: topologymanager.NewFakeManager(), expectedError: fmt.Errorf("the total amount of memory of type %q is not equal to the value determined by Node Allocatable feature", v1.ResourceMemory), @@ -2089,7 +2179,7 @@ func TestNewManager(t *testing.T) { policyName: policyTypeStatic, machineInfo: machineInfo, nodeAllocatableReservation: v1.ResourceList{}, - systemReservedMemory: kubetypes.NUMANodeResources{}, + systemReservedMemory: []kubeletconfig.MemoryReservation{}, affinity: topologymanager.NewFakeManager(), expectedError: fmt.Errorf("[memorymanager] you should specify the system reserved memory"), expectedReserved: expectedReserved, @@ -2099,7 +2189,7 @@ func TestNewManager(t *testing.T) { policyName: "fake", machineInfo: machineInfo, nodeAllocatableReservation: v1.ResourceList{}, - systemReservedMemory: kubetypes.NUMANodeResources{}, + systemReservedMemory: []kubeletconfig.MemoryReservation{}, affinity: topologymanager.NewFakeManager(), expectedError: fmt.Errorf("unknown policy: \"fake\""), expectedReserved: expectedReserved, @@ -2109,7 +2199,7 @@ func TestNewManager(t *testing.T) { policyName: policyTypeNone, machineInfo: machineInfo, nodeAllocatableReservation: v1.ResourceList{}, - systemReservedMemory: kubetypes.NUMANodeResources{}, + systemReservedMemory: []kubeletconfig.MemoryReservation{}, affinity: topologymanager.NewFakeManager(), expectedError: nil, expectedReserved: expectedReserved, diff --git a/pkg/kubelet/types/BUILD b/pkg/kubelet/types/BUILD index 06357196f2b8..2f4e1abe1918 100644 --- a/pkg/kubelet/types/BUILD +++ b/pkg/kubelet/types/BUILD @@ -20,7 +20,6 @@ go_library( deps = [ "//pkg/apis/scheduling:go_default_library", "//staging/src/k8s.io/api/core/v1:go_default_library", - "//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/types:go_default_library", ], diff --git a/pkg/kubelet/types/types.go b/pkg/kubelet/types/types.go index 8dc13bcc7913..2ed7f1c991e5 100644 --- a/pkg/kubelet/types/types.go +++ b/pkg/kubelet/types/types.go @@ -21,7 +21,6 @@ import ( "time" v1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/types" ) @@ -114,6 +113,3 @@ type ResolvedPodUID types.UID // MirrorPodUID is a pod UID for a mirror pod. type MirrorPodUID types.UID - -// NUMANodeResources is a set of (resource name, quantity) pairs for each NUMA node. -type NUMANodeResources map[int]map[v1.ResourceName]resource.Quantity diff --git a/test/e2e_node/memory_manager_test.go b/test/e2e_node/memory_manager_test.go index fb02b8aed463..d25565f80a83 100644 --- a/test/e2e_node/memory_manager_test.go +++ b/test/e2e_node/memory_manager_test.go @@ -46,9 +46,6 @@ import ( const ( evictionHardMemory = "memory.available" memoryManagerStateFile = "/var/lib/kubelet/memory_manager_state" - reservedLimit = "limit" - reservedNUMANode = "numa-node" - reservedType = "type" resourceMemory = "memory" staticPolicy = "static" nonePolicy = "none" @@ -159,7 +156,7 @@ func getMemoryManagerState() (*state.MemoryManagerCheckpoint, error) { type kubeletParams struct { memoryManagerFeatureGate bool memoryManagerPolicy string - systemReservedMemory []map[string]string + systemReservedMemory []kubeletconfig.MemoryReservation systemReserved map[string]string kubeReserved map[string]string evictionHard map[string]string @@ -200,10 +197,10 @@ func getUpdatedKubeletConfig(oldCfg *kubeletconfig.KubeletConfiguration, params // update reserved memory if newCfg.ReservedMemory == nil { - newCfg.ReservedMemory = []map[string]string{} + newCfg.ReservedMemory = []kubeletconfig.MemoryReservation{} } - for _, p := range params.systemReservedMemory { - newCfg.ReservedMemory = append(newCfg.ReservedMemory, p) + for _, memoryReservation := range params.systemReservedMemory { + newCfg.ReservedMemory = append(newCfg.ReservedMemory, memoryReservation) } return newCfg @@ -259,10 +256,17 @@ var _ = SIGDescribe("Memory Manager [Serial] [Feature:MemoryManager][NodeAlphaFe ) f := framework.NewDefaultFramework("memory-manager-test") + + memoryQuantatity := resource.MustParse("1100Mi") defaultKubeParams := &kubeletParams{ memoryManagerFeatureGate: true, - systemReservedMemory: []map[string]string{ - {reservedNUMANode: "0", reservedType: resourceMemory, reservedLimit: "1100Mi"}, + systemReservedMemory: []kubeletconfig.MemoryReservation{ + { + NumaNode: 0, + Limits: v1.ResourceList{ + resourceMemory: memoryQuantatity, + }, + }, }, systemReserved: map[string]string{resourceMemory: "500Mi"}, kubeReserved: map[string]string{resourceMemory: "500Mi"}, From 932134034ce5f71077ffa1124930b858902eca86 Mon Sep 17 00:00:00 2001 From: Artyom Lukianov Date: Wed, 18 Nov 2020 01:25:13 +0200 Subject: [PATCH 30/31] memory manager: update API constant to have camel case format Signed-off-by: Artyom Lukianov --- cmd/kubelet/app/options/options.go | 2 +- .../scheme/testdata/KubeletConfiguration/after/v1beta1.yaml | 2 +- .../KubeletConfiguration/roundtrip/default/v1beta1.yaml | 2 +- pkg/kubelet/cm/memorymanager/policy_none.go | 2 +- pkg/kubelet/cm/memorymanager/policy_static.go | 2 +- staging/src/k8s.io/kubelet/config/v1beta1/types.go | 6 +++--- test/e2e_node/memory_manager_test.go | 4 ++-- 7 files changed, 10 insertions(+), 10 deletions(-) diff --git a/cmd/kubelet/app/options/options.go b/cmd/kubelet/app/options/options.go index 0caa42c98307..7dc63dbe7432 100644 --- a/cmd/kubelet/app/options/options.go +++ b/cmd/kubelet/app/options/options.go @@ -552,7 +552,7 @@ Runtime log sanitization may introduce significant computation overhead and ther fs.BoolVar(&c.KernelMemcgNotification, "experimental-kernel-memcg-notification", c.KernelMemcgNotification, "Use kernelMemcgNotification configuration, this flag will be removed in 1.23.") // Memory Manager Flags - fs.StringVar(&c.MemoryManagerPolicy, "memory-manager-policy", c.MemoryManagerPolicy, "Memory Manager policy to use. Possible values: 'none', 'static'. Default: 'none'") + fs.StringVar(&c.MemoryManagerPolicy, "memory-manager-policy", c.MemoryManagerPolicy, "Memory Manager policy to use. Possible values: 'None', 'Static'. Default: 'None'") // TODO: once documentation link is available, replace KEP link with the documentation one. fs.Var(&utilflag.ReservedMemoryVar{Value: &c.ReservedMemory}, "reserved-memory", "A comma separated list of memory reservations for NUMA nodes. (e.g. --reserved-memory 0:memory=1Gi,hugepages-1M=2Gi --reserved-memory 1:memory=2Gi). The total sum for each memory type should be equal to the sum of kube-reserved, system-reserved and eviction-threshold. See more details under https://github.com/kubernetes/enhancements/tree/master/keps/sig-node/1769-memory-manager#reserved-memory-flag") } diff --git a/pkg/kubelet/apis/config/scheme/testdata/KubeletConfiguration/after/v1beta1.yaml b/pkg/kubelet/apis/config/scheme/testdata/KubeletConfiguration/after/v1beta1.yaml index a945c692e5e2..4ebbd7267916 100644 --- a/pkg/kubelet/apis/config/scheme/testdata/KubeletConfiguration/after/v1beta1.yaml +++ b/pkg/kubelet/apis/config/scheme/testdata/KubeletConfiguration/after/v1beta1.yaml @@ -55,7 +55,7 @@ logging: makeIPTablesUtilChains: true maxOpenFiles: 1000000 maxPods: 110 -memoryManagerPolicy: none +memoryManagerPolicy: None nodeLeaseDurationSeconds: 40 nodeStatusMaxImages: 50 nodeStatusReportFrequency: 5m0s diff --git a/pkg/kubelet/apis/config/scheme/testdata/KubeletConfiguration/roundtrip/default/v1beta1.yaml b/pkg/kubelet/apis/config/scheme/testdata/KubeletConfiguration/roundtrip/default/v1beta1.yaml index a945c692e5e2..4ebbd7267916 100644 --- a/pkg/kubelet/apis/config/scheme/testdata/KubeletConfiguration/roundtrip/default/v1beta1.yaml +++ b/pkg/kubelet/apis/config/scheme/testdata/KubeletConfiguration/roundtrip/default/v1beta1.yaml @@ -55,7 +55,7 @@ logging: makeIPTablesUtilChains: true maxOpenFiles: 1000000 maxPods: 110 -memoryManagerPolicy: none +memoryManagerPolicy: None nodeLeaseDurationSeconds: 40 nodeStatusMaxImages: 50 nodeStatusReportFrequency: 5m0s diff --git a/pkg/kubelet/cm/memorymanager/policy_none.go b/pkg/kubelet/cm/memorymanager/policy_none.go index a8b6b778520a..8df7bdb0dad7 100644 --- a/pkg/kubelet/cm/memorymanager/policy_none.go +++ b/pkg/kubelet/cm/memorymanager/policy_none.go @@ -22,7 +22,7 @@ import ( "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" ) -const policyTypeNone policyType = "none" +const policyTypeNone policyType = "None" // none is implementation of the policy interface for the none policy, using none // policy is the same as disable memory management diff --git a/pkg/kubelet/cm/memorymanager/policy_static.go b/pkg/kubelet/cm/memorymanager/policy_static.go index 2da2ecd4c6ba..f6ad0b5fcc25 100644 --- a/pkg/kubelet/cm/memorymanager/policy_static.go +++ b/pkg/kubelet/cm/memorymanager/policy_static.go @@ -33,7 +33,7 @@ import ( "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask" ) -const policyTypeStatic policyType = "static" +const policyTypeStatic policyType = "Static" type systemReservedMemory map[int]map[v1.ResourceName]uint64 diff --git a/staging/src/k8s.io/kubelet/config/v1beta1/types.go b/staging/src/k8s.io/kubelet/config/v1beta1/types.go index 4ae08f27c4b7..1c009b96eaa3 100644 --- a/staging/src/k8s.io/kubelet/config/v1beta1/types.go +++ b/staging/src/k8s.io/kubelet/config/v1beta1/types.go @@ -75,11 +75,11 @@ const ( PodTopologyManagerScope = "pod" // NoneMemoryManagerPolicy is a memory manager none policy, under the none policy // the memory manager will not pin containers memory of guaranteed pods - NoneMemoryManagerPolicy = "none" + NoneMemoryManagerPolicy = "None" // StaticMemoryManagerPolicy is a memory manager static policy, under the static policy // the memory manager will try to pin containers memory of guaranteed pods to the smallest // possible sub-set of NUMA nodes - StaticMemoryManagerPolicy = "static" + StaticMemoryManagerPolicy = "Static" ) // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object @@ -430,7 +430,7 @@ type KubeletConfiguration struct { // Requires the CPUManager feature gate to be enabled. // Dynamic Kubelet Config (beta): This field should not be updated without a full node // reboot. It is safest to keep this value the same as the local config. - // Default: "none" + // Default: "None" // +optional CPUManagerPolicy string `json:"cpuManagerPolicy,omitempty"` // CPU Manager reconciliation period. diff --git a/test/e2e_node/memory_manager_test.go b/test/e2e_node/memory_manager_test.go index d25565f80a83..c92a16f7eee8 100644 --- a/test/e2e_node/memory_manager_test.go +++ b/test/e2e_node/memory_manager_test.go @@ -47,8 +47,8 @@ const ( evictionHardMemory = "memory.available" memoryManagerStateFile = "/var/lib/kubelet/memory_manager_state" resourceMemory = "memory" - staticPolicy = "static" - nonePolicy = "none" + staticPolicy = "Static" + nonePolicy = "None" ) // Helper for makeMemoryManagerPod(). From 102124464a994946e151c976775cf751423b14f7 Mon Sep 17 00:00:00 2001 From: Artyom Lukianov Date: Mon, 14 Dec 2020 20:28:11 +0200 Subject: [PATCH 31/31] memory manager: improve the reserved memory validation logic We will have two layers of the validation. - the first part of the validation logic will be implemented under the `ValidateKubeletConfiguration` method - the second one that requires knowledge about machine topology and node allocatable resources will be implemented under the memory manager. Signed-off-by: Artyom Lukianov --- pkg/kubelet/apis/config/validation/BUILD | 10 +- .../apis/config/validation/validation.go | 2 + .../validation/validation_reserved_memory.go | 64 ++++++++++ .../validation_reserved_memory_test.go | 120 ++++++++++++++++++ .../cm/memorymanager/memory_manager.go | 18 +-- .../cm/memorymanager/memory_manager_test.go | 18 +-- 6 files changed, 213 insertions(+), 19 deletions(-) create mode 100644 pkg/kubelet/apis/config/validation/validation_reserved_memory.go create mode 100644 pkg/kubelet/apis/config/validation/validation_reserved_memory_test.go diff --git a/pkg/kubelet/apis/config/validation/BUILD b/pkg/kubelet/apis/config/validation/BUILD index 25ad727a0a04..55d2875d903f 100644 --- a/pkg/kubelet/apis/config/validation/BUILD +++ b/pkg/kubelet/apis/config/validation/BUILD @@ -11,14 +11,17 @@ go_library( srcs = [ "validation.go", "validation_others.go", + "validation_reserved_memory.go", "validation_windows.go", ], importpath = "k8s.io/kubernetes/pkg/kubelet/apis/config/validation", deps = [ + "//pkg/apis/core/v1/helper:go_default_library", "//pkg/features:go_default_library", "//pkg/kubelet/apis/config:go_default_library", "//pkg/kubelet/cm/cpuset:go_default_library", "//pkg/kubelet/types:go_default_library", + "//staging/src/k8s.io/api/core/v1:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/util/errors:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/util/validation:go_default_library", @@ -43,10 +46,15 @@ filegroup( go_test( name = "go_default_test", - srcs = ["validation_test.go"], + srcs = [ + "validation_reserved_memory_test.go", + "validation_test.go", + ], embed = [":go_default_library"], deps = [ "//pkg/kubelet/apis/config:go_default_library", + "//staging/src/k8s.io/api/core/v1:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/util/errors:go_default_library", ], diff --git a/pkg/kubelet/apis/config/validation/validation.go b/pkg/kubelet/apis/config/validation/validation.go index 56069cae10bf..69b08a8229d4 100644 --- a/pkg/kubelet/apis/config/validation/validation.go +++ b/pkg/kubelet/apis/config/validation/validation.go @@ -193,6 +193,8 @@ func ValidateKubeletConfiguration(kc *kubeletconfig.KubeletConfiguration) error } } + allErrors = append(allErrors, validateReservedMemoryConfiguration(kc)...) + if err := validateKubeletOSConfiguration(kc); err != nil { allErrors = append(allErrors, err) } diff --git a/pkg/kubelet/apis/config/validation/validation_reserved_memory.go b/pkg/kubelet/apis/config/validation/validation_reserved_memory.go new file mode 100644 index 000000000000..ba4d9a289ffb --- /dev/null +++ b/pkg/kubelet/apis/config/validation/validation_reserved_memory.go @@ -0,0 +1,64 @@ +/* +Copyright 2018 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package validation + +import ( + "fmt" + + v1 "k8s.io/api/core/v1" + corev1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper" + kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config" +) + +// validateReservedMemory validates the reserved memory configuration and returns an error if it is invalid. +func validateReservedMemoryConfiguration(kc *kubeletconfig.KubeletConfiguration) []error { + if len(kc.ReservedMemory) == 0 { + return nil + } + + var errors []error + + numaTypeDuplicates := map[int32]map[v1.ResourceName]bool{} + for _, reservedMemory := range kc.ReservedMemory { + numaNode := reservedMemory.NumaNode + if _, ok := numaTypeDuplicates[numaNode]; !ok { + numaTypeDuplicates[numaNode] = map[v1.ResourceName]bool{} + } + + for resourceName, q := range reservedMemory.Limits { + if !reservedMemorySupportedLimit(resourceName) { + errors = append(errors, fmt.Errorf("the limit type %q for NUMA node %d is not supported, only %v is accepted", resourceName, numaNode, []v1.ResourceName{v1.ResourceMemory, v1.ResourceHugePagesPrefix + ""})) + } + + // validates that the limit has non-zero value + if q.IsZero() { + errors = append(errors, fmt.Errorf("reserved memory may not be zero for NUMA node %d and resource %q", numaNode, resourceName)) + } + + // validates that no duplication for NUMA node and limit type occurred + if _, ok := numaTypeDuplicates[numaNode][resourceName]; ok { + errors = append(errors, fmt.Errorf("the reserved memory has a duplicate value for NUMA node %d and resource %q", numaNode, resourceName)) + } + numaTypeDuplicates[numaNode][resourceName] = true + } + } + return errors +} + +func reservedMemorySupportedLimit(resourceName v1.ResourceName) bool { + return corev1helper.IsHugePageResourceName(resourceName) || resourceName == v1.ResourceMemory +} diff --git a/pkg/kubelet/apis/config/validation/validation_reserved_memory_test.go b/pkg/kubelet/apis/config/validation/validation_reserved_memory_test.go new file mode 100644 index 000000000000..1910ffaa1ebc --- /dev/null +++ b/pkg/kubelet/apis/config/validation/validation_reserved_memory_test.go @@ -0,0 +1,120 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package validation + +import ( + "fmt" + "testing" + + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config" +) + +func TestValidateReservedMemoryConfiguration(t *testing.T) { + testCases := []struct { + description string + kubeletConfiguration *kubeletconfig.KubeletConfiguration + expectedError error + }{ + { + description: "The kubelet configuration does not have reserved memory parameter", + kubeletConfiguration: &kubeletconfig.KubeletConfiguration{}, + expectedError: nil, + }, + { + description: "The kubelet configuration has valid reserved memory parameter", + kubeletConfiguration: &kubeletconfig.KubeletConfiguration{ + ReservedMemory: []kubeletconfig.MemoryReservation{ + { + NumaNode: 0, + Limits: v1.ResourceList{ + v1.ResourceMemory: *resource.NewQuantity(128, resource.DecimalSI), + }, + }, + }, + }, + expectedError: nil, + }, + { + description: "The reserved memory has duplications for the NUMA node and limit type", + kubeletConfiguration: &kubeletconfig.KubeletConfiguration{ + ReservedMemory: []kubeletconfig.MemoryReservation{ + { + NumaNode: 0, + Limits: v1.ResourceList{ + v1.ResourceMemory: *resource.NewQuantity(128, resource.DecimalSI), + }, + }, + { + NumaNode: 0, + Limits: v1.ResourceList{ + v1.ResourceMemory: *resource.NewQuantity(64, resource.DecimalSI), + }, + }, + }, + }, + expectedError: fmt.Errorf("the reserved memory has a duplicate value for NUMA node %d and resource %q", 0, v1.ResourceMemory), + }, + { + description: "The reserved memory has unsupported limit type", + kubeletConfiguration: &kubeletconfig.KubeletConfiguration{ + ReservedMemory: []kubeletconfig.MemoryReservation{ + { + NumaNode: 0, + Limits: v1.ResourceList{ + "blabla": *resource.NewQuantity(128, resource.DecimalSI), + }, + }, + }, + }, + expectedError: fmt.Errorf("the limit type %q for NUMA node %d is not supported, only [memory hugepages-] is accepted", "blabla", 0), + }, + { + description: "The reserved memory has limit type with zero value", + kubeletConfiguration: &kubeletconfig.KubeletConfiguration{ + ReservedMemory: []kubeletconfig.MemoryReservation{ + { + NumaNode: 0, + Limits: v1.ResourceList{ + v1.ResourceMemory: *resource.NewQuantity(0, resource.DecimalSI), + }, + }, + }, + }, + expectedError: fmt.Errorf("reserved memory may not be zero for NUMA node %d and resource %q", 0, v1.ResourceMemory), + }, + } + + for _, testCase := range testCases { + errors := validateReservedMemoryConfiguration(testCase.kubeletConfiguration) + + if len(errors) != 0 && testCase.expectedError == nil { + t.Errorf("expected errors %v, got %v", errors, testCase.expectedError) + } + + if testCase.expectedError != nil { + if len(errors) == 0 { + t.Errorf("expected error %v, got %v", testCase.expectedError, errors) + } + + if errors[0].Error() != testCase.expectedError.Error() { + t.Errorf("expected error %v, got %v", testCase.expectedError, errors[0]) + } + } + } +} diff --git a/pkg/kubelet/cm/memorymanager/memory_manager.go b/pkg/kubelet/cm/memorymanager/memory_manager.go index 7840d6d0b25f..1b00008bfa45 100644 --- a/pkg/kubelet/cm/memorymanager/memory_manager.go +++ b/pkg/kubelet/cm/memorymanager/memory_manager.go @@ -321,7 +321,7 @@ func (m *manager) policyRemoveContainerByRef(podUID string, containerName string return err } -func getTotalMemoryTypeReserved(machineInfo *cadvisorapi.MachineInfo, reservedMemory []kubeletconfig.MemoryReservation) map[v1.ResourceName]resource.Quantity { +func getTotalMemoryTypeReserved(machineInfo *cadvisorapi.MachineInfo, reservedMemory []kubeletconfig.MemoryReservation) (map[v1.ResourceName]resource.Quantity, error) { totalMemoryType := map[v1.ResourceName]resource.Quantity{} numaNodes := map[int]bool{} @@ -331,8 +331,7 @@ func getTotalMemoryTypeReserved(machineInfo *cadvisorapi.MachineInfo, reservedMe for _, reservation := range reservedMemory { if !numaNodes[int(reservation.NumaNode)] { - klog.Warningf("The NUMA node %d specified under --reserved-memory does not exist on the machine", reservation.NumaNode) - continue + return nil, fmt.Errorf("the reserved memory configuration references a NUMA node %d that does not exist on this machine", reservation.NumaNode) } for resourceName, q := range reservation.Limits { @@ -343,19 +342,20 @@ func getTotalMemoryTypeReserved(machineInfo *cadvisorapi.MachineInfo, reservedMe } } - return totalMemoryType + return totalMemoryType, nil } func validateReservedMemory(machineInfo *cadvisorapi.MachineInfo, nodeAllocatableReservation v1.ResourceList, reservedMemory []kubeletconfig.MemoryReservation) error { - totalMemoryType := getTotalMemoryTypeReserved(machineInfo, reservedMemory) + totalMemoryType, err := getTotalMemoryTypeReserved(machineInfo, reservedMemory) + if err != nil { + return err + } commonMemoryTypeSet := make(map[v1.ResourceName]bool) for resourceType := range totalMemoryType { - if !(corev1helper.IsHugePageResourceName(resourceType) || resourceType == v1.ResourceMemory) { - continue - } commonMemoryTypeSet[resourceType] = true } + for resourceType := range nodeAllocatableReservation { if !(corev1helper.IsHugePageResourceName(resourceType) || resourceType == v1.ResourceMemory) { continue @@ -375,7 +375,7 @@ func validateReservedMemory(machineInfo *cadvisorapi.MachineInfo, nodeAllocatabl } if !(*nodeAllocatableMemory).Equal(*reservedMemory) { - return fmt.Errorf("the total amount of memory of type \"%s\" is not equal to the value determined by Node Allocatable feature", resourceType) + return fmt.Errorf("the total amount %q of type %q is not equal to the value %q determined by Node Allocatable feature", reservedMemory.String(), resourceType, nodeAllocatableMemory.String()) } } diff --git a/pkg/kubelet/cm/memorymanager/memory_manager_test.go b/pkg/kubelet/cm/memorymanager/memory_manager_test.go index e1d0f20d86d4..ff2b3f811c9d 100644 --- a/pkg/kubelet/cm/memorymanager/memory_manager_test.go +++ b/pkg/kubelet/cm/memorymanager/memory_manager_test.go @@ -152,7 +152,7 @@ func TestValidateReservedMemory(t *testing.T) { {Id: 1}, }, } - const msgNotEqual = "the total amount of memory of type \"%s\" is not equal to the value determined by Node Allocatable feature" + const msgNotEqual = "the total amount %q of type %q is not equal to the value %q determined by Node Allocatable feature" testCases := []struct { description string nodeAllocatableReservation v1.ResourceList @@ -193,14 +193,14 @@ func TestValidateReservedMemory(t *testing.T) { }, }, }, - fmt.Sprintf(msgNotEqual, v1.ResourceMemory), + fmt.Sprintf(msgNotEqual, "12", v1.ResourceMemory, "0"), }, { "Node Allocatable set, reserved not set", v1.ResourceList{hugepages2M: *resource.NewQuantity(5, resource.DecimalSI)}, machineInfo, []kubeletconfig.MemoryReservation{}, - fmt.Sprintf(msgNotEqual, hugepages2M), + fmt.Sprintf(msgNotEqual, "0", hugepages2M, "5"), }, { "Reserved not equal to Node Allocatable", @@ -214,7 +214,7 @@ func TestValidateReservedMemory(t *testing.T) { }, }, }, - fmt.Sprintf(msgNotEqual, v1.ResourceMemory), + fmt.Sprintf(msgNotEqual, "12", v1.ResourceMemory, "5"), }, { "Reserved contains the NUMA node that does not exist under the machine", @@ -234,7 +234,7 @@ func TestValidateReservedMemory(t *testing.T) { }, }, }, - fmt.Sprintf(msgNotEqual, v1.ResourceMemory), + "the reserved memory configuration references a NUMA node 2 that does not exist on this machine", }, { "Reserved total equal to Node Allocatable", @@ -285,7 +285,7 @@ func TestValidateReservedMemory(t *testing.T) { }, }, - fmt.Sprintf(msgNotEqual, hugepages2M), + fmt.Sprintf(msgNotEqual, "77", hugepages2M, "14"), }, } @@ -404,7 +404,7 @@ func TestGetSystemReservedMemory(t *testing.T) { machineInfo: machineInfo, }, { - description: "Should return error when Allocatable reservation is not equal pre reserved memory", + description: "Should return error when Allocatable reservation is not equal to the reserved memory", nodeAllocatableReservation: v1.ResourceList{}, systemReservedMemory: []kubeletconfig.MemoryReservation{ { @@ -415,7 +415,7 @@ func TestGetSystemReservedMemory(t *testing.T) { }, }, expectedReserved: nil, - expectedError: fmt.Errorf("the total amount of memory of type \"memory\" is not equal to the value determined by Node Allocatable feature"), + expectedError: fmt.Errorf("the total amount \"1Gi\" of type \"memory\" is not equal to the value \"0\" determined by Node Allocatable feature"), machineInfo: machineInfo, }, { @@ -2171,7 +2171,7 @@ func TestNewManager(t *testing.T) { }, }, affinity: topologymanager.NewFakeManager(), - expectedError: fmt.Errorf("the total amount of memory of type %q is not equal to the value determined by Node Allocatable feature", v1.ResourceMemory), + expectedError: fmt.Errorf("the total amount \"3Gi\" of type %q is not equal to the value \"2Gi\" determined by Node Allocatable feature", v1.ResourceMemory), expectedReserved: expectedReserved, }, {