diff --git a/cmd/pinger/pinger.go b/cmd/pinger/pinger.go index a9e72c38ab6..bd4e0cdbf58 100644 --- a/cmd/pinger/pinger.go +++ b/cmd/pinger/pinger.go @@ -1,7 +1,6 @@ package pinger import ( - "fmt" "net/http" _ "net/http/pprof" // #nosec "time" @@ -18,43 +17,44 @@ func CmdMain() { defer klog.Flush() klog.Infof(versions.String()) - pinger.InitPingerMetrics() - util.InitKlogMetrics() config, err := pinger.ParseFlags() if err != nil { util.LogFatalAndExit(err, "failed to parse config") } - if config.Mode == "server" && config.EnableMetrics { - mux := http.NewServeMux() - mux.Handle("/metrics", promhttp.Handler()) - - go func() { - // conform to Gosec G114 - // https://github.com/securego/gosec#available-rules - server := &http.Server{ - Addr: fmt.Sprintf("0.0.0.0:%d", config.Port), - ReadHeaderTimeout: 3 * time.Second, - Handler: mux, - } - util.LogFatalAndExit(server.ListenAndServe(), "failed to listen and serve on %s", server.Addr) - }() + if config.Mode == "server" { + if config.EnableMetrics { + pinger.InitPingerMetrics() + util.InitKlogMetrics() + + mux := http.NewServeMux() + mux.Handle("/metrics", promhttp.Handler()) + go func() { + // conform to Gosec G114 + // https://github.com/securego/gosec#available-rules + server := &http.Server{ + Addr: util.JoinHostPort("0.0.0.0", config.Port), + ReadHeaderTimeout: 3 * time.Second, + Handler: mux, + } + util.LogFatalAndExit(server.ListenAndServe(), "failed to listen and serve on %s", server.Addr) + }() + } if config.EnableVerboseConnCheck { go func() { - addr := fmt.Sprintf("0.0.0.0:%d", config.TCPConnCheckPort) + addr := util.JoinHostPort("0.0.0.0", config.TCPConnCheckPort) if err := util.TCPConnectivityListen(addr); err != nil { util.LogFatalAndExit(err, "failed to start TCP listen on addr %s ", addr) } }() go func() { - addr := fmt.Sprintf("0.0.0.0:%d", config.UDPConnCheckPort) + addr := util.JoinHostPort("0.0.0.0", config.UDPConnCheckPort) if err := util.UDPConnectivityListen(addr); err != nil { util.LogFatalAndExit(err, "failed to start UDP listen on addr %s ", addr) } }() } } - e := pinger.NewExporter(config) - pinger.StartPinger(config, e) + pinger.StartPinger(config) } diff --git a/pkg/pinger/config.go b/pkg/pinger/config.go index f876f56e36e..61de6ba273e 100644 --- a/pkg/pinger/config.go +++ b/pkg/pinger/config.go @@ -19,7 +19,7 @@ import ( type Configuration struct { KubeConfigFile string KubeClient kubernetes.Interface - Port int + Port int32 DaemonSetNamespace string DaemonSetName string Interval int @@ -51,18 +51,18 @@ type Configuration struct { ServiceOvnControllerFileLogPath string ServiceOvnControllerFilePidPath string EnableVerboseConnCheck bool - TCPConnCheckPort int - UDPConnCheckPort int + TCPConnCheckPort int32 + UDPConnCheckPort int32 TargetIPPorts string } func ParseFlags() (*Configuration, error) { var ( - argPort = pflag.Int("port", 8080, "metrics port") + argPort = pflag.Int32("port", 8080, "metrics port") argEnableVerboseConnCheck = pflag.Bool("enable-verbose-conn-check", false, "enable TCP/UDP connectivity check") - argTCPConnectivityCheckPort = pflag.Int("tcp-conn-check-port", 8100, "TCP connectivity Check Port") - argUDPConnectivityCheckPort = pflag.Int("udp-conn-check-port", 8101, "UDP connectivity Check Port") + argTCPConnectivityCheckPort = pflag.Int32("tcp-conn-check-port", 8100, "TCP connectivity Check Port") + argUDPConnectivityCheckPort = pflag.Int32("udp-conn-check-port", 8101, "UDP connectivity Check Port") argKubeConfigFile = pflag.String("kubeconfig", "", "Path to kubeconfig file with authorization and master location information. If not set use the inCluster token.") argDaemonSetNameSpace = pflag.String("ds-namespace", "kube-system", "kube-ovn-pinger daemonset namespace") diff --git a/pkg/pinger/ovn.go b/pkg/pinger/ovn.go index 23255da627d..1396800cef3 100644 --- a/pkg/pinger/ovn.go +++ b/pkg/pinger/ovn.go @@ -11,7 +11,7 @@ import ( "github.com/kubeovn/kube-ovn/pkg/util" ) -func checkOvs(config *Configuration) error { +func checkOvs(config *Configuration, setMetrics bool) error { output, err := exec.Command("/usr/share/openvswitch/scripts/ovs-ctl", "status").CombinedOutput() if err != nil { klog.Errorf("check ovs status failed %v, %s", err, string(output)) @@ -19,11 +19,13 @@ func checkOvs(config *Configuration) error { return err } klog.Infof("ovs-vswitchd and ovsdb are up") - SetOvsUpMetrics(config.NodeName) + if setMetrics { + SetOvsUpMetrics(config.NodeName) + } return nil } -func checkOvnController(config *Configuration) error { +func checkOvnController(config *Configuration, setMetrics bool) error { output, err := exec.Command("/usr/share/ovn/scripts/ovn-ctl", "status_controller").CombinedOutput() if err != nil { klog.Errorf("check ovn_controller status failed %v, %q", err, output) @@ -31,11 +33,13 @@ func checkOvnController(config *Configuration) error { return err } klog.Infof("ovn_controller is up") - SetOvnControllerUpMetrics(config.NodeName) + if setMetrics { + SetOvnControllerUpMetrics(config.NodeName) + } return nil } -func checkPortBindings(config *Configuration) error { +func checkPortBindings(config *Configuration, setMetrics bool) error { klog.Infof("start to check port binding") ovsBindings, err := checkOvsBindings() if err != nil { @@ -62,7 +66,9 @@ func checkPortBindings(config *Configuration) error { } klog.Infof("ovs and ovn-sb binding check passed") - inconsistentPortBindingGauge.WithLabelValues(config.NodeName).Set(0) + if setMetrics { + inconsistentPortBindingGauge.WithLabelValues(config.NodeName).Set(0) + } return nil } diff --git a/pkg/pinger/ping.go b/pkg/pinger/ping.go index 944f305d518..7982dad87a2 100644 --- a/pkg/pinger/ping.go +++ b/pkg/pinger/ping.go @@ -19,23 +19,29 @@ import ( "github.com/kubeovn/kube-ovn/pkg/util" ) -func StartPinger(config *Configuration, e *Exporter) { +func StartPinger(config *Configuration) { errHappens := false + var exporter *Exporter + withMetrics := config.Mode == "server" && config.EnableMetrics for { if config.NetworkMode == "kube-ovn" { - if checkOvs(config) != nil { + if checkOvs(config, withMetrics) != nil { errHappens = true } - if checkOvnController(config) != nil { + if checkOvnController(config, withMetrics) != nil { errHappens = true } - if checkPortBindings(config) != nil { + if checkPortBindings(config, withMetrics) != nil { errHappens = true } - e.ovsMetricsUpdate() + if withMetrics { + if exporter == nil { + exporter = NewExporter(config) + } + exporter.ovsMetricsUpdate() + } } - - if ping(config) != nil { + if ping(config, withMetrics) != nil { errHappens = true } if config.Mode != "server" { @@ -48,23 +54,23 @@ func StartPinger(config *Configuration, e *Exporter) { } } -func ping(config *Configuration) error { +func ping(config *Configuration, withMetrics bool) error { errHappens := false - if checkAPIServer(config) != nil { + if checkAPIServer(config, withMetrics) != nil { errHappens = true } - if pingPods(config) != nil { + if pingPods(config, withMetrics) != nil { errHappens = true } - if pingNodes(config) != nil { + if pingNodes(config, withMetrics) != nil { errHappens = true } - if internalNslookup(config) != nil { + if internalNslookup(config, withMetrics) != nil { errHappens = true } if config.ExternalDNS != "" { - if externalNslookup(config) != nil { + if externalNslookup(config, withMetrics) != nil { errHappens = true } } @@ -76,7 +82,7 @@ func ping(config *Configuration) error { } if config.ExternalAddress != "" { - if pingExternal(config) != nil { + if pingExternal(config, withMetrics) != nil { errHappens = true } } @@ -86,7 +92,7 @@ func ping(config *Configuration) error { return nil } -func pingNodes(config *Configuration) error { +func pingNodes(config *Configuration, setMetrics bool) error { klog.Infof("start to check node connectivity") nodes, err := config.KubeClient.CoreV1().Nodes().List(context.Background(), metav1.ListOptions{}) if err != nil { @@ -137,14 +143,16 @@ func pingNodes(config *Configuration) error { if int(math.Abs(float64(stats.PacketsSent-stats.PacketsRecv))) != 0 { pingErr = fmt.Errorf("ping failed") } - SetNodePingMetrics( - config.NodeName, - config.HostIP, - config.PodName, - no.Name, addr.Address, - float64(stats.AvgRtt)/float64(time.Millisecond), - int(math.Abs(float64(stats.PacketsSent-stats.PacketsRecv))), - int(float64(stats.PacketsSent))) + if setMetrics { + SetNodePingMetrics( + config.NodeName, + config.HostIP, + config.PodName, + no.Name, addr.Address, + float64(stats.AvgRtt)/float64(time.Millisecond), + int(math.Abs(float64(stats.PacketsSent-stats.PacketsRecv))), + int(float64(stats.PacketsSent))) + } }(addr.Address, no.Name) } } @@ -152,7 +160,7 @@ func pingNodes(config *Configuration) error { return pingErr } -func pingPods(config *Configuration) error { +func pingPods(config *Configuration, setMetrics bool) error { klog.Infof("start to check pod connectivity") ds, err := config.KubeClient.AppsV1().DaemonSets(config.DaemonSetNamespace).Get(context.Background(), config.DaemonSetName, metav1.GetOptions{}) if err != nil { @@ -209,16 +217,18 @@ func pingPods(config *Configuration) error { if int(math.Abs(float64(stats.PacketsSent-stats.PacketsRecv))) != 0 { pingErr = fmt.Errorf("ping failed") } - SetPodPingMetrics( - config.NodeName, - config.HostIP, - config.PodName, - nodeName, - nodeIP, - podIP, - float64(stats.AvgRtt)/float64(time.Millisecond), - int(math.Abs(float64(stats.PacketsSent-stats.PacketsRecv))), - int(float64(stats.PacketsSent))) + if setMetrics { + SetPodPingMetrics( + config.NodeName, + config.HostIP, + config.PodName, + nodeName, + nodeIP, + podIP, + float64(stats.AvgRtt)/float64(time.Millisecond), + int(math.Abs(float64(stats.PacketsSent-stats.PacketsRecv))), + int(float64(stats.PacketsSent))) + } }(podIP.IP, pod.Name, pod.Status.HostIP, pod.Spec.NodeName) } } @@ -226,7 +236,7 @@ func pingPods(config *Configuration) error { return pingErr } -func pingExternal(config *Configuration) error { +func pingExternal(config *Configuration, setMetrics bool) error { if config.ExternalAddress == "" { return nil } @@ -255,13 +265,15 @@ func pingExternal(config *Configuration) error { stats := pinger.Statistics() klog.Infof("ping external address: %s, total count: %d, loss count %d, average rtt %.2fms", addr, pinger.Count, int(math.Abs(float64(stats.PacketsSent-stats.PacketsRecv))), float64(stats.AvgRtt)/float64(time.Millisecond)) - SetExternalPingMetrics( - config.NodeName, - config.HostIP, - config.PodIP, - addr, - float64(stats.AvgRtt)/float64(time.Millisecond), - int(math.Abs(float64(stats.PacketsSent-stats.PacketsRecv)))) + if setMetrics { + SetExternalPingMetrics( + config.NodeName, + config.HostIP, + config.PodIP, + addr, + float64(stats.AvgRtt)/float64(time.Millisecond), + int(math.Abs(float64(stats.PacketsSent-stats.PacketsRecv)))) + } if int(math.Abs(float64(stats.PacketsSent-stats.PacketsRecv))) != 0 { return fmt.Errorf("ping failed") } @@ -318,7 +330,7 @@ func checkAccessTargetIPPorts(config *Configuration) error { return checkErr } -func internalNslookup(config *Configuration) error { +func internalNslookup(config *Configuration, setMetrics bool) error { klog.Infof("start to check dns connectivity") t1 := time.Now() ctx, cancel := context.WithTimeout(context.TODO(), 10*time.Second) @@ -328,15 +340,19 @@ func internalNslookup(config *Configuration) error { elapsed := time.Since(t1) if err != nil { klog.Errorf("failed to resolve dns %s, %v", config.InternalDNS, err) - SetInternalDNSUnhealthyMetrics(config.NodeName) + if setMetrics { + SetInternalDNSUnhealthyMetrics(config.NodeName) + } return err } - SetInternalDNSHealthyMetrics(config.NodeName, float64(elapsed)/float64(time.Millisecond)) + if setMetrics { + SetInternalDNSHealthyMetrics(config.NodeName, float64(elapsed)/float64(time.Millisecond)) + } klog.Infof("resolve dns %s to %v in %.2fms", config.InternalDNS, addrs, float64(elapsed)/float64(time.Millisecond)) return nil } -func externalNslookup(config *Configuration) error { +func externalNslookup(config *Configuration, setMetrics bool) error { klog.Infof("start to check dns connectivity") t1 := time.Now() ctx, cancel := context.WithTimeout(context.TODO(), 10*time.Second) @@ -346,25 +362,33 @@ func externalNslookup(config *Configuration) error { elapsed := time.Since(t1) if err != nil { klog.Errorf("failed to resolve dns %s, %v", config.ExternalDNS, err) - SetExternalDNSUnhealthyMetrics(config.NodeName) + if setMetrics { + SetExternalDNSUnhealthyMetrics(config.NodeName) + } return err } - SetExternalDNSHealthyMetrics(config.NodeName, float64(elapsed)/float64(time.Millisecond)) + if setMetrics { + SetExternalDNSHealthyMetrics(config.NodeName, float64(elapsed)/float64(time.Millisecond)) + } klog.Infof("resolve dns %s to %v in %.2fms", config.ExternalDNS, addrs, float64(elapsed)/float64(time.Millisecond)) return nil } -func checkAPIServer(config *Configuration) error { +func checkAPIServer(config *Configuration, setMetrics bool) error { klog.Infof("start to check apiserver connectivity") t1 := time.Now() _, err := config.KubeClient.Discovery().ServerVersion() elapsed := time.Since(t1) if err != nil { klog.Errorf("failed to connect to apiserver: %v", err) - SetApiserverUnhealthyMetrics(config.NodeName) + if setMetrics { + SetApiserverUnhealthyMetrics(config.NodeName) + } return err } klog.Infof("connect to apiserver success in %.2fms", float64(elapsed)/float64(time.Millisecond)) - SetApiserverHealthyMetrics(config.NodeName, float64(elapsed)/float64(time.Millisecond)) + if setMetrics { + SetApiserverHealthyMetrics(config.NodeName, float64(elapsed)/float64(time.Millisecond)) + } return nil }