Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix initContainersReusableMemory delete bug in MemoryManager #104788

Merged
merged 1 commit into from
Oct 1, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion pkg/kubelet/cm/memorymanager/policy_static.go
Original file line number Diff line number Diff line change
Expand Up @@ -837,7 +837,7 @@ func (p *staticPolicy) updatePodReusableMemory(pod *v1.Pod, container *v1.Contai
// If pod entries to m.initContainersReusableMemory other than the current pod exist, delete them.
for uid := range p.initContainersReusableMemory {
if podUID != uid {
delete(p.initContainersReusableMemory, podUID)
delete(p.initContainersReusableMemory, uid)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

/ok-to-test
Compared with the implementation of cpuManager, it should be Fixed here

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Like you can see it is using the different variable name under the loop

for podUID := range p.cpusToReuse {

So the behavior here is ok, we want to delete pod from re-usable pods once we are working on the pod with different UID.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are right, but the current code alway deletes the same current podUID inner the for iterator. For example, while admitting a pod with 2 init-containers and 2 app-containers. Since caculating memory for the 4 containers, every time in updatePodReusableMemory(pod *v1.Pod, container *v1.Container, memoryBlocks []state.Block) , the current code always clear the p.initContainersReusableMemory[podUID] for each container caculating of the current one pod, it's not correct.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Like you can see it is using the different variable name under the loop

for podUID := range p.cpusToReuse {

So the behavior here is ok, we want to delete pod from re-usable pods once we are working on the pod with different UID.


	// If pod entries to m.initContainersReusableMemory other than the current pod exist, delete them.
	for uid := range p.initContainersReusableMemory {
		if podUID != uid {
			delete(p.initContainersReusableMemory, podUID)  // ************while we are working on a new pod with different UID, we should delete other pod from re-usable, not the current new one

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok I see now

}
}

Expand Down
215 changes: 196 additions & 19 deletions pkg/kubelet/cm/memorymanager/policy_static_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,20 +117,21 @@ func areContainerMemoryAssignmentsEqual(t *testing.T, cma1, cma2 state.Container
}

type testStaticPolicy struct {
description string
assignments state.ContainerMemoryAssignments
expectedAssignments state.ContainerMemoryAssignments
machineState state.NUMANodeMap
expectedMachineState state.NUMANodeMap
systemReserved systemReservedMemory
expectedError error
machineInfo *cadvisorapi.MachineInfo
pod *v1.Pod
topologyHint *topologymanager.TopologyHint
expectedTopologyHints map[string][]topologymanager.TopologyHint
description string
assignments state.ContainerMemoryAssignments
expectedAssignments state.ContainerMemoryAssignments
machineState state.NUMANodeMap
expectedMachineState state.NUMANodeMap
systemReserved systemReservedMemory
expectedError error
machineInfo *cadvisorapi.MachineInfo
pod *v1.Pod
topologyHint *topologymanager.TopologyHint
expectedTopologyHints map[string][]topologymanager.TopologyHint
initContainersReusableMemory reusableMemory
}

func initTests(t *testing.T, testCase *testStaticPolicy, hint *topologymanager.TopologyHint) (Policy, state.State, error) {
func initTests(t *testing.T, testCase *testStaticPolicy, hint *topologymanager.TopologyHint, initContainersReusableMemory reusableMemory) (Policy, state.State, error) {
manager := topologymanager.NewFakeManager()
if hint != nil {
manager = topologymanager.NewFakeManagerWithHint(hint)
Expand All @@ -140,6 +141,9 @@ func initTests(t *testing.T, testCase *testStaticPolicy, hint *topologymanager.T
if err != nil {
return nil, nil, err
}
if initContainersReusableMemory != nil {
p.(*staticPolicy).initContainersReusableMemory = initContainersReusableMemory
}
s := state.NewMemoryState()
s.SetMachineState(testCase.machineState)
s.SetMemoryAssignments(testCase.assignments)
Expand Down Expand Up @@ -173,7 +177,7 @@ func TestStaticPolicyNew(t *testing.T) {

for _, testCase := range testCases {
t.Run(testCase.description, func(t *testing.T) {
_, _, err := initTests(t, &testCase, nil)
_, _, err := initTests(t, &testCase, nil, nil)
if !reflect.DeepEqual(err, testCase.expectedError) {
t.Fatalf("The actual error: %v is different from the expected one: %v", err, testCase.expectedError)
}
Expand All @@ -194,7 +198,7 @@ func TestStaticPolicyName(t *testing.T) {
}
for _, testCase := range testCases {
t.Run(testCase.description, func(t *testing.T) {
p, _, err := initTests(t, &testCase, nil)
p, _, err := initTests(t, &testCase, nil, nil)
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
Expand Down Expand Up @@ -1022,7 +1026,7 @@ func TestStaticPolicyStart(t *testing.T) {
for _, testCase := range testCases {
t.Run(testCase.description, func(t *testing.T) {
t.Logf("[Start] %s", testCase.description)
p, s, err := initTests(t, &testCase, nil)
p, s, err := initTests(t, &testCase, nil, nil)
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
Expand Down Expand Up @@ -1763,7 +1767,7 @@ func TestStaticPolicyAllocate(t *testing.T) {
for _, testCase := range testCases {
t.Run(testCase.description, func(t *testing.T) {
t.Logf("TestStaticPolicyAllocate %s", testCase.description)
p, s, err := initTests(t, &testCase, testCase.topologyHint)
p, s, err := initTests(t, &testCase, testCase.topologyHint, nil)
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
Expand Down Expand Up @@ -2308,12 +2312,185 @@ func TestStaticPolicyAllocateWithInitContainers(t *testing.T) {
),
topologyHint: &topologymanager.TopologyHint{},
},
{
description: "should re-use init containers memory, init containers requests 7Gi and 4Gi, apps containers 5Gi and 2Gi",
assignments: state.ContainerMemoryAssignments{},
initContainersReusableMemory: reusableMemory{"pod0": map[string]map[v1.ResourceName]uint64{}},
expectedAssignments: state.ContainerMemoryAssignments{
"pod1": map[string][]state.Block{
"initContainer1": {
{
NUMAAffinity: []int{0},
Type: v1.ResourceMemory,
Size: 0,
},
{
NUMAAffinity: []int{0},
Type: hugepages1Gi,
Size: 0,
},
},
"initContainer2": {
{
NUMAAffinity: []int{0},
Type: v1.ResourceMemory,
Size: 0,
},
{
NUMAAffinity: []int{0},
Type: hugepages1Gi,
Size: 0,
},
},
"container1": {
{
NUMAAffinity: []int{0},
Type: v1.ResourceMemory,
Size: 5 * gb,
},
{
NUMAAffinity: []int{0},
Type: hugepages1Gi,
Size: 5 * gb,
},
},
"container2": {
{
NUMAAffinity: []int{0},
Type: v1.ResourceMemory,
Size: 2 * gb,
},
{
NUMAAffinity: []int{0},
Type: hugepages1Gi,
Size: 2 * gb,
},
},
},
},
machineState: state.NUMANodeMap{
0: &state.NUMANodeState{
MemoryMap: map[v1.ResourceName]*state.MemoryTable{
v1.ResourceMemory: {
Allocatable: 10240 * mb,
Free: 10240 * mb,
Reserved: 0,
SystemReserved: 512 * mb,
TotalMemSize: 10 * gb,
},
hugepages1Gi: {
Allocatable: 10 * gb,
Free: 10 * gb,
Reserved: 0,
SystemReserved: 0,
TotalMemSize: 10 * gb,
},
},
Cells: []int{0},
},
},
expectedMachineState: state.NUMANodeMap{
0: &state.NUMANodeState{
MemoryMap: map[v1.ResourceName]*state.MemoryTable{
v1.ResourceMemory: {
Allocatable: 10240 * mb,
Free: 3072 * mb,
Reserved: 7 * gb,
SystemReserved: 512 * mb,
TotalMemSize: 10 * gb,
},
hugepages1Gi: {
Allocatable: 10 * gb,
Free: 3 * gb,
Reserved: 7 * gb,
SystemReserved: 0,
TotalMemSize: 10 * gb,
},
},
Cells: []int{0},
NumberOfAssignments: 8,
},
},
systemReserved: systemReservedMemory{
0: map[v1.ResourceName]uint64{
v1.ResourceMemory: 512 * mb,
},
},
pod: getPodWithInitContainers(
"pod1",
[]v1.Container{
{
Name: "container1",
Resources: v1.ResourceRequirements{
Limits: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("1000Mi"),
v1.ResourceMemory: resource.MustParse("5Gi"),
hugepages1Gi: resource.MustParse("5Gi"),
},
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("1000Mi"),
v1.ResourceMemory: resource.MustParse("5Gi"),
hugepages1Gi: resource.MustParse("5Gi"),
},
},
},
{
Name: "container2",
Resources: v1.ResourceRequirements{
Limits: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("1000Mi"),
v1.ResourceMemory: resource.MustParse("2Gi"),
hugepages1Gi: resource.MustParse("2Gi"),
},
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("1000Mi"),
v1.ResourceMemory: resource.MustParse("2Gi"),
hugepages1Gi: resource.MustParse("2Gi"),
},
},
},
},
[]v1.Container{
{
Name: "initContainer1",
Resources: v1.ResourceRequirements{
Limits: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("1000Mi"),
v1.ResourceMemory: resource.MustParse("7Gi"),
hugepages1Gi: resource.MustParse("7Gi"),
},
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("1000Mi"),
v1.ResourceMemory: resource.MustParse("7Gi"),
hugepages1Gi: resource.MustParse("7Gi"),
},
},
},
{
Name: "initContainer2",
Resources: v1.ResourceRequirements{
Limits: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("1000Mi"),
v1.ResourceMemory: resource.MustParse("4Gi"),
hugepages1Gi: resource.MustParse("4Gi"),
},
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("1000Mi"),
v1.ResourceMemory: resource.MustParse("4Gi"),
hugepages1Gi: resource.MustParse("4Gi"),
},
},
},
},
),
topologyHint: &topologymanager.TopologyHint{},
},
}

for _, testCase := range testCases {
t.Run(testCase.description, func(t *testing.T) {
klog.InfoS("TestStaticPolicyAllocateWithInitContainers", "test name", testCase.description)
p, s, err := initTests(t, &testCase, testCase.topologyHint)
p, s, err := initTests(t, &testCase, testCase.topologyHint, testCase.initContainersReusableMemory)
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
Expand Down Expand Up @@ -2583,7 +2760,7 @@ func TestStaticPolicyRemoveContainer(t *testing.T) {

for _, testCase := range testCases {
t.Run(testCase.description, func(t *testing.T) {
p, s, err := initTests(t, &testCase, nil)
p, s, err := initTests(t, &testCase, nil, nil)
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
Expand Down Expand Up @@ -2876,7 +3053,7 @@ func TestStaticPolicyGetTopologyHints(t *testing.T) {

for _, testCase := range testCases {
t.Run(testCase.description, func(t *testing.T) {
p, s, err := initTests(t, &testCase, nil)
p, s, err := initTests(t, &testCase, nil, nil)
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
Expand Down