Skip to content

Commit

Permalink
kubelet: add operations count and error count metrics to network plug…
Browse files Browse the repository at this point in the history
…in manager
  • Loading branch information
AnishShah committed Jul 15, 2020
1 parent 6079ceb commit 0ffe89e
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 4 deletions.
28 changes: 27 additions & 1 deletion pkg/kubelet/dockershim/network/metrics/metrics.go
Expand Up @@ -28,9 +28,11 @@ import (

const (
// NetworkPluginOperationsKey is the key for operation count metrics.
NetworkPluginOperationsKey = "network_plugin_operations"
NetworkPluginOperationsKey = "network_plugin_operations_total"
// NetworkPluginOperationsLatencyKey is the key for the operation latency metrics.
NetworkPluginOperationsLatencyKey = "network_plugin_operations_duration_seconds"
// NetworkPluginOperationsErrorsKey is the key for the operations error metrics.
NetworkPluginOperationsErrorsKey = "network_plugin_operations_errors_total"

// Keep the "kubelet" subsystem for backward compatibility.
kubeletSubsystem = "kubelet"
Expand All @@ -49,6 +51,28 @@ var (
},
[]string{"operation_type"},
)

// NetworkPluginOperations collects operation counts by operation type.
NetworkPluginOperations = metrics.NewCounterVec(
&metrics.CounterOpts{
Subsystem: kubeletSubsystem,
Name: NetworkPluginOperationsKey,
Help: "Cumulative number of network plugin operations by operation type.",
StabilityLevel: metrics.ALPHA,
},
[]string{"operation_type"},
)

// NetworkPluginOperationsErrors collects operation errors by operation type.
NetworkPluginOperationsErrors = metrics.NewCounterVec(
&metrics.CounterOpts{
Subsystem: kubeletSubsystem,
Name: NetworkPluginOperationsErrorsKey,
Help: "Cumulative number of network plugin operation errors by operation type.",
StabilityLevel: metrics.ALPHA,
},
[]string{"operation_type"},
)
)

var registerMetrics sync.Once
Expand All @@ -57,6 +81,8 @@ var registerMetrics sync.Once
func Register() {
registerMetrics.Do(func() {
legacyregistry.MustRegister(NetworkPluginOperationsLatency)
legacyregistry.MustRegister(NetworkPluginOperations)
legacyregistry.MustRegister(NetworkPluginOperationsErrors)
})
}

Expand Down
18 changes: 15 additions & 3 deletions pkg/kubelet/dockershim/network/plugins.go
Expand Up @@ -382,45 +382,57 @@ func (pm *PluginManager) podUnlock(fullPodName string) {

// recordOperation records operation and duration
func recordOperation(operation string, start time.Time) {
metrics.NetworkPluginOperations.WithLabelValues(operation).Inc()
metrics.NetworkPluginOperationsLatency.WithLabelValues(operation).Observe(metrics.SinceInSeconds(start))
}

// recordError records errors for metric.
func recordError(operation string) {
metrics.NetworkPluginOperationsErrors.WithLabelValues(operation).Inc()
}

func (pm *PluginManager) GetPodNetworkStatus(podNamespace, podName string, id kubecontainer.ContainerID) (*PodNetworkStatus, error) {
defer recordOperation("get_pod_network_status", time.Now())
const operation = "get_pod_network_status"
defer recordOperation(operation, time.Now())
fullPodName := kubecontainer.BuildPodFullName(podName, podNamespace)
pm.podLock(fullPodName).Lock()
defer pm.podUnlock(fullPodName)

netStatus, err := pm.plugin.GetPodNetworkStatus(podNamespace, podName, id)
if err != nil {
recordError(operation)
return nil, fmt.Errorf("networkPlugin %s failed on the status hook for pod %q: %v", pm.plugin.Name(), fullPodName, err)
}

return netStatus, nil
}

func (pm *PluginManager) SetUpPod(podNamespace, podName string, id kubecontainer.ContainerID, annotations, options map[string]string) error {
defer recordOperation("set_up_pod", time.Now())
const operation = "set_up_pod"
defer recordOperation(operation, time.Now())
fullPodName := kubecontainer.BuildPodFullName(podName, podNamespace)
pm.podLock(fullPodName).Lock()
defer pm.podUnlock(fullPodName)

klog.V(3).Infof("Calling network plugin %s to set up pod %q", pm.plugin.Name(), fullPodName)
if err := pm.plugin.SetUpPod(podNamespace, podName, id, annotations, options); err != nil {
recordError(operation)
return fmt.Errorf("networkPlugin %s failed to set up pod %q network: %v", pm.plugin.Name(), fullPodName, err)
}

return nil
}

func (pm *PluginManager) TearDownPod(podNamespace, podName string, id kubecontainer.ContainerID) error {
defer recordOperation("tear_down_pod", time.Now())
const operation = "tear_down_pod"
defer recordOperation(operation, time.Now())
fullPodName := kubecontainer.BuildPodFullName(podName, podNamespace)
pm.podLock(fullPodName).Lock()
defer pm.podUnlock(fullPodName)

klog.V(3).Infof("Calling network plugin %s to tear down pod %q", pm.plugin.Name(), fullPodName)
if err := pm.plugin.TearDownPod(podNamespace, podName, id); err != nil {
recordError(operation)
return fmt.Errorf("networkPlugin %s failed to teardown pod %q network: %v", pm.plugin.Name(), fullPodName, err)
}

Expand Down

0 comments on commit 0ffe89e

Please sign in to comment.