Skip to content

Commit

Permalink
Add iptables restore failure metrics
Browse files Browse the repository at this point in the history
As mentioned in issue #80061, in iptables lock contention case,
we can see increasing rate of iptables restore failures because it
need to grab iptables file lock.

The failure metric can provide administrators more insight

Metrics will be collected in kube-proxy iptables and ipvs modes

Signed-off-by: Hui Luo <luoh@vmware.com>
  • Loading branch information
figo committed Aug 9, 2019
1 parent eadf68e commit a2ef00c
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 0 deletions.
3 changes: 3 additions & 0 deletions pkg/proxy/iptables/proxier.go
Expand Up @@ -429,6 +429,7 @@ func CleanupLeftovers(ipt utiliptables.Interface) (encounteredError bool) {
err = ipt.Restore(utiliptables.TableNAT, natLines, utiliptables.NoFlushTables, utiliptables.RestoreCounters)
if err != nil {
klog.Errorf("Failed to execute iptables-restore for %s: %v", utiliptables.TableNAT, err)
metrics.IptablesRestoreFailuresTotal.Inc()
encounteredError = true
}
}
Expand All @@ -455,6 +456,7 @@ func CleanupLeftovers(ipt utiliptables.Interface) (encounteredError bool) {
// Write it.
if err := ipt.Restore(utiliptables.TableFilter, filterLines, utiliptables.NoFlushTables, utiliptables.RestoreCounters); err != nil {
klog.Errorf("Failed to execute iptables-restore for %s: %v", utiliptables.TableFilter, err)
metrics.IptablesRestoreFailuresTotal.Inc()
encounteredError = true
}
}
Expand Down Expand Up @@ -1401,6 +1403,7 @@ func (proxier *Proxier) syncProxyRules() {
err = proxier.iptables.RestoreAll(proxier.iptablesData.Bytes(), utiliptables.NoFlushTables, utiliptables.RestoreCounters)
if err != nil {
klog.Errorf("Failed to execute iptables-restore: %v", err)
metrics.IptablesRestoreFailuresTotal.Inc()
// Revert new local ports.
klog.V(2).Infof("Closing local ports after iptables-restore failure")
utilproxy.RevertPorts(replacementPortsMap, proxier.portsMap)
Expand Down
1 change: 1 addition & 0 deletions pkg/proxy/ipvs/proxier.go
Expand Up @@ -1310,6 +1310,7 @@ func (proxier *Proxier) syncProxyRules() {
err = proxier.iptables.RestoreAll(proxier.iptablesData.Bytes(), utiliptables.NoFlushTables, utiliptables.RestoreCounters)
if err != nil {
klog.Errorf("Failed to execute iptables-restore: %v\nRules:\n%s", err, proxier.iptablesData.Bytes())
metrics.IptablesRestoreFailuresTotal.Inc()
// Revert new local ports.
utilproxy.RevertPorts(replacementPortsMap, proxier.portsMap)
return
Expand Down
11 changes: 11 additions & 0 deletions pkg/proxy/metrics/metrics.go
Expand Up @@ -116,6 +116,16 @@ var (
Help: "Cumulative proxy rules Service changes",
},
)

// IptablesRestoreFailuresTotal is the number of iptables restore failures that the proxy has
// seen.
IptablesRestoreFailuresTotal = prometheus.NewCounter(
prometheus.CounterOpts{
Subsystem: kubeProxySubsystem,
Name: "sync_proxy_rules_iptables_restore_failures_total",
Help: "Cumulative proxy iptables restore failures",
},
)
)

var registerMetricsOnce sync.Once
Expand All @@ -131,6 +141,7 @@ func RegisterMetrics() {
prometheus.MustRegister(EndpointChangesTotal)
prometheus.MustRegister(ServiceChangesPending)
prometheus.MustRegister(ServiceChangesTotal)
prometheus.MustRegister(IptablesRestoreFailuresTotal)
})
}

Expand Down

0 comments on commit a2ef00c

Please sign in to comment.