Skip to content

Commit

Permalink
node: devicemgr: topomgr: add logs
Browse files Browse the repository at this point in the history
One of the contributing factors of issues #118559 and #109595 hard to
debug and fix is that the devicemanager has very few logs in important
flow, so it's unnecessarily hard to reconstruct the state from logs.

We add minimal logs to be able to improve troubleshooting.
We add minimal logs to be backport-friendly, deferring a more
comprehensive review of logging to later PRs.

Signed-off-by: Francesco Romani <fromani@redhat.com>
  • Loading branch information
ffromani committed Aug 8, 2023
1 parent b6aaf8c commit 180aa30
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 1 deletion.
4 changes: 4 additions & 0 deletions pkg/kubelet/cm/devicemanager/manager.go
Expand Up @@ -226,6 +226,7 @@ func (m *ManagerImpl) PluginConnected(resourceName string, p plugin.DevicePlugin
defer m.mutex.Unlock()
m.endpoints[resourceName] = endpointInfo{e, options}

klog.V(2).InfoS("Device plugin connected", "resourceName", resourceName)
return nil
}

Expand Down Expand Up @@ -256,6 +257,7 @@ func (m *ManagerImpl) PluginListAndWatchReceiver(resourceName string, resp *plug
}

func (m *ManagerImpl) genericDeviceUpdateCallback(resourceName string, devices []pluginapi.Device) {
healthyCount := 0
m.mutex.Lock()
m.healthyDevices[resourceName] = sets.NewString()
m.unhealthyDevices[resourceName] = sets.NewString()
Expand All @@ -264,6 +266,7 @@ func (m *ManagerImpl) genericDeviceUpdateCallback(resourceName string, devices [
m.allDevices[resourceName][dev.ID] = dev
if dev.Health == pluginapi.Healthy {
m.healthyDevices[resourceName].Insert(dev.ID)
healthyCount++
} else {
m.unhealthyDevices[resourceName].Insert(dev.ID)
}
Expand All @@ -272,6 +275,7 @@ func (m *ManagerImpl) genericDeviceUpdateCallback(resourceName string, devices [
if err := m.writeCheckpoint(); err != nil {
klog.ErrorS(err, "Writing checkpoint encountered")
}
klog.V(2).InfoS("Processed device updates for resource", "resourceName", resourceName, "totalCount", len(devices), "healthyCount", healthyCount)
}

// GetWatcherHandler returns the plugin handler
Expand Down
2 changes: 1 addition & 1 deletion pkg/kubelet/cm/topologymanager/topology_manager.go
Expand Up @@ -207,7 +207,7 @@ func (m *manager) RemoveContainer(containerID string) error {
}

func (m *manager) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle.PodAdmitResult {
klog.InfoS("Topology Admit Handler")
klog.InfoS("Topology Admit Handler", "podUID", attrs.Pod.UID, "podNamespace", attrs.Pod.Namespace, "podName", attrs.Pod.Name)
pod := attrs.Pod

return m.scope.Admit(pod)
Expand Down

0 comments on commit 180aa30

Please sign in to comment.