Skip to content

Commit

Permalink
UPSTREAM: 116376: node: device-mgr: Handle recovery by checking if he…
Browse files Browse the repository at this point in the history
…althy devices exist

In case of node reboot/kubelet restart, the flow of events involves
obtaining the state from the checkpoint file followed by setting
the `healthDevices`/`unhealthyDevices` to its zero value. This is
done to allow the device plugin to re-register itself so that
capacity can be updated appropriately.

During the allocation phase, we need to check if the resources requested
by the pod have been registered AND healthy devices are present on
the node to be allocated.

Also we need to move this check above `needed==0` where needed is
required - devices allocated to the container (which is obtained from
the checkpoint file) because even in cases where no additional devices
have to be allocated (as they were pre-allocated), we still need to
make sure he devices that were previously allocated are healthy.

Signed-off-by: Swati Sehgal <swsehgal@redhat.com>
  • Loading branch information
swatisehgal committed May 5, 2023
1 parent 16bcd69 commit 613c8d1
Show file tree
Hide file tree
Showing 2 changed files with 115 additions and 5 deletions.
24 changes: 19 additions & 5 deletions pkg/kubelet/cm/devicemanager/manager.go
Expand Up @@ -700,15 +700,29 @@ func (m *ManagerImpl) devicesToAllocate(podUID, contName, resource string, requi
return nil, fmt.Errorf("pod %q container %q changed request for resource %q from %d to %d", string(podUID), contName, resource, devices.Len(), required)
}
}

klog.V(3).InfoS("Need devices to allocate for pod", "deviceNumber", needed, "resourceName", resource, "podUID", string(podUID), "containerName", contName)
healthyDevices, hasRegistered := m.healthyDevices[resource]

// Check if resource registered with devicemanager
if !hasRegistered {
return nil, fmt.Errorf("cannot allocate unregistered device %s", resource)
}

// Check if registered resource has healthy devices
if healthyDevices.Len() == 0 {
return nil, fmt.Errorf("no healthy devices present; cannot allocate unhealthy devices %s", resource)
}

// Check if all the previously allocated devices are healthy
if !healthyDevices.IsSuperset(devices) {
return nil, fmt.Errorf("previously allocated devices are no longer healthy; cannot allocate unhealthy devices %s", resource)
}

if needed == 0 {
// No change, no work.
return nil, nil
}
klog.V(3).InfoS("Need devices to allocate for pod", "deviceNumber", needed, "resourceName", resource, "podUID", string(podUID), "containerName", contName)
// Check if resource registered with devicemanager
if _, ok := m.healthyDevices[resource]; !ok {
return nil, fmt.Errorf("can't allocate unregistered device %s", resource)
}

// Declare the list of allocated devices.
// This will be populated and returned below.
Expand Down
96 changes: 96 additions & 0 deletions pkg/kubelet/cm/devicemanager/manager_test.go
Expand Up @@ -959,6 +959,102 @@ func TestPodContainerDeviceAllocation(t *testing.T) {

}

func TestPodContainerDeviceToAllocate(t *testing.T) {
resourceName1 := "domain1.com/resource1"
resourceName2 := "domain2.com/resource2"
resourceName3 := "domain2.com/resource3"
as := require.New(t)
tmpDir, err := os.MkdirTemp("", "checkpoint")
as.Nil(err)
defer os.RemoveAll(tmpDir)

testManager := &ManagerImpl{
endpoints: make(map[string]endpointInfo),
healthyDevices: make(map[string]sets.String),
unhealthyDevices: make(map[string]sets.String),
allocatedDevices: make(map[string]sets.String),
podDevices: newPodDevices(),
}

testManager.podDevices.insert("pod1", "con1", resourceName1,
constructDevices([]string{"dev1", "dev2"}),
constructAllocResp(map[string]string{"/dev/r2dev1": "/dev/r2dev1", "/dev/r2dev2": "/dev/r2dev2"},
map[string]string{"/home/r2lib1": "/usr/r2lib1"},
map[string]string{"r2devices": "dev1 dev2"}))
testManager.podDevices.insert("pod2", "con2", resourceName2,
checkpoint.DevicesPerNUMA{nodeWithoutTopology: []string{"dev5"}},
constructAllocResp(map[string]string{"/dev/r1dev5": "/dev/r1dev5"},
map[string]string{"/home/r1lib1": "/usr/r1lib1"}, map[string]string{}))
testManager.podDevices.insert("pod3", "con3", resourceName3,
checkpoint.DevicesPerNUMA{nodeWithoutTopology: []string{"dev5"}},
constructAllocResp(map[string]string{"/dev/r1dev5": "/dev/r1dev5"},
map[string]string{"/home/r1lib1": "/usr/r1lib1"}, map[string]string{}))

// no healthy devices for resourceName1 and devices corresponding to
// resource2 are intentionally omitted to simulate that the resource
// hasn't been registered.
testManager.healthyDevices[resourceName1] = sets.NewString()
testManager.healthyDevices[resourceName3] = sets.NewString()
// dev5 is no longer in the list of healthy devices
testManager.healthyDevices[resourceName3].Insert("dev7")
testManager.healthyDevices[resourceName3].Insert("dev8")

testCases := []struct {
description string
podUID string
contName string
resource string
required int
reusableDevices sets.String
expectedAllocatedDevices sets.String
expErr error
}{
{
description: "Admission error in case no healthy devices to allocate present",
podUID: "pod1",
contName: "con1",
resource: resourceName1,
required: 2,
reusableDevices: sets.NewString(),
expectedAllocatedDevices: nil,
expErr: fmt.Errorf("no healthy devices present; cannot allocate unhealthy devices %s", resourceName1),
},
{
description: "Admission error in case resource is not registered",
podUID: "pod2",
contName: "con2",
resource: resourceName2,
required: 1,
reusableDevices: sets.NewString(),
expectedAllocatedDevices: nil,
expErr: fmt.Errorf("cannot allocate unregistered device %s", resourceName2),
},
{
description: "Admission error in case resource not devices previously allocated no longer healthy",
podUID: "pod3",
contName: "con3",
resource: resourceName3,
required: 1,
reusableDevices: sets.NewString(),
expectedAllocatedDevices: nil,
expErr: fmt.Errorf("previously allocated devices are no longer healthy; cannot allocate unhealthy devices %s", resourceName3),
},
}

for _, testCase := range testCases {
allocDevices, err := testManager.devicesToAllocate(testCase.podUID, testCase.contName, testCase.resource, testCase.required, testCase.reusableDevices)
if !reflect.DeepEqual(err, testCase.expErr) {
t.Errorf("devicePluginManager error (%v). expected error: %v but got: %v",
testCase.description, testCase.expErr, err)
}
if !reflect.DeepEqual(allocDevices, testCase.expectedAllocatedDevices) {
t.Errorf("devicePluginManager error (%v). expected error: %v but got: %v",
testCase.description, testCase.expectedAllocatedDevices, allocDevices)
}
}

}

func TestInitContainerDeviceAllocation(t *testing.T) {
// Requesting to create a pod that requests resourceName1 in init containers and normal containers
// should succeed with devices allocated to init containers reallocated to normal containers.
Expand Down

0 comments on commit 613c8d1

Please sign in to comment.