Skip to content

Commit

Permalink
feat: optimize log
Browse files Browse the repository at this point in the history
1. add file log to survive pod restart
2. add klog metrics
3. mute some error log
  • Loading branch information
oilbeater committed Sep 30, 2021
1 parent 09980b7 commit d384232
Show file tree
Hide file tree
Showing 11 changed files with 88 additions and 14 deletions.
2 changes: 2 additions & 0 deletions cmd/controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package controller
import (
"context"
"fmt"
"github.com/kubeovn/kube-ovn/pkg/util"
"net/http"
_ "net/http/pprof" // #nosec
"os"
Expand All @@ -27,6 +28,7 @@ func CmdMain() {

controller.InitClientGoMetrics()
controller.InitWorkQueueMetrics()
util.InitKlogMetrics()
config, err := controller.ParseFlags()
if err != nil {
klog.Fatalf("parse config failed %v", err)
Expand Down
1 change: 1 addition & 0 deletions cmd/daemon/cniserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ func CmdMain() {

klog.Infof(versions.String())
daemon.InitMetrics()
util.InitKlogMetrics()
if err := daemon.InitOVSBridges(); err != nil {
klog.Fatalf("failed to initialize OVS bridges: %v", err)
}
Expand Down
2 changes: 2 additions & 0 deletions cmd/pinger/pinger.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package pinger

import (
"fmt"
"github.com/kubeovn/kube-ovn/pkg/util"
"net/http"
_ "net/http/pprof" // #nosec

Expand All @@ -17,6 +18,7 @@ func CmdMain() {

klog.Infof(versions.String())
pinger.InitPingerMetrics()
util.InitKlogMetrics()
config, err := pinger.ParseFlags()
if err != nil {
klog.Fatalf("parse config failed %v", err)
Expand Down
30 changes: 29 additions & 1 deletion dist/images/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -1844,6 +1844,9 @@ spec:
- --enable-lb=$ENABLE_LB
- --enable-np=$ENABLE_NP
- --enable-external-vpc=$ENABLE_EXTERNAL_VPC
- --logtostderr=false
- --alsologtostderr=true
- --log_file=/var/log/kube-ovn/kube-ovn-controller.log
env:
- name: ENABLE_SSL
value: "$ENABLE_SSL"
Expand All @@ -1864,6 +1867,8 @@ spec:
volumeMounts:
- mountPath: /etc/localtime
name: localtime
- mountPath: /var/log/kube-ovn
name: kube-ovn-log
- mountPath: /var/run/tls
name: kube-ovn-tls
readinessProbe:
Expand Down Expand Up @@ -1895,6 +1900,9 @@ spec:
- name: localtime
hostPath:
path: /etc/localtime
- name: kube-ovn-log
hostPath:
path: /var/log/kube-ovn
- name: kube-ovn-tls
secret:
optional: true
Expand Down Expand Up @@ -1951,6 +1959,9 @@ spec:
- --iface=${IFACE}
- --network-type=$NETWORK_TYPE
- --default-interface-name=$VLAN_INTERFACE_NAME
- --logtostderr=false
- --alsologtostderr=true
- --log_file=/var/log/kube-ovn/kube-ovn-cni.log
securityContext:
runAsUser: 0
privileged: true
Expand All @@ -1977,6 +1988,8 @@ spec:
- mountPath: /var/run/netns
name: host-ns
mountPropagation: HostToContainer
- mountPath: /var/log/kube-ovn
name: kube-ovn-log
- mountPath: /etc/localtime
name: localtime
readinessProbe:
Expand Down Expand Up @@ -2027,6 +2040,9 @@ spec:
- name: host-ns
hostPath:
path: /var/run/netns
- name: kube-ovn-log
hostPath:
path: /var/log/kube-ovn
- name: localtime
hostPath:
path: /etc/localtime
Expand Down Expand Up @@ -2060,7 +2076,14 @@ spec:
containers:
- name: pinger
image: "$REGISTRY/kube-ovn:$VERSION"
command: ["/kube-ovn/kube-ovn-pinger", "--external-address=$PINGER_EXTERNAL_ADDRESS", "--external-dns=$PINGER_EXTERNAL_DOMAIN"]
command:
- /kube-ovn/kube-ovn-pinger
args:
- --external-address=$PINGER_EXTERNAL_ADDRESS
- --external-dns=$PINGER_EXTERNAL_DOMAIN
- --logtostderr=false
- --alsologtostderr=true
- --log_file=/var/log/kube-ovn/kube-ovn-pinger.log
imagePullPolicy: $IMAGE_PULL_POLICY
securityContext:
runAsUser: 0
Expand Down Expand Up @@ -2103,6 +2126,8 @@ spec:
name: host-log-ovs
- mountPath: /var/log/ovn
name: host-log-ovn
- mountPath: /var/log/kube-ovn
name: kube-ovn-log
- mountPath: /etc/localtime
name: localtime
- mountPath: /var/run/tls
Expand Down Expand Up @@ -2135,6 +2160,9 @@ spec:
- name: host-log-ovs
hostPath:
path: /var/log/openvswitch
- name: kube-ovn-log
hostPath:
path: /var/log/kube-ovn
- name: host-log-ovn
hostPath:
path: /var/log/ovn
Expand Down
7 changes: 3 additions & 4 deletions pkg/controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -471,6 +471,7 @@ func (c *Controller) startWorkers(stopCh <-chan struct{}) {
// add default/join subnet and wait them ready
go wait.Until(c.runAddSubnetWorker, time.Second, stopCh)
go wait.Until(c.runAddVlanWorker, time.Second, stopCh)
go wait.Until(c.runAddNamespaceWorker, time.Second, stopCh)
for {
klog.Infof("wait for %s and %s ready", c.config.DefaultLogicalSwitch, c.config.NodeSwitch)
time.Sleep(3 * time.Second)
Expand All @@ -479,7 +480,7 @@ func (c *Controller) startWorkers(stopCh <-chan struct{}) {
klog.Fatalf("failed to list logical switch: %v", err)
}

if util.IsStringIn(c.config.DefaultLogicalSwitch, lss) && util.IsStringIn(c.config.NodeSwitch, lss) {
if util.IsStringIn(c.config.DefaultLogicalSwitch, lss) && util.IsStringIn(c.config.NodeSwitch, lss) && c.addNamespaceQueue.Len() == 0 {
break
}
}
Expand Down Expand Up @@ -513,9 +514,6 @@ func (c *Controller) startWorkers(stopCh <-chan struct{}) {
}
}

// run in a single worker to avoid subnet cidr conflict
go wait.Until(c.runAddNamespaceWorker, time.Second, stopCh)

go wait.Until(c.runDelVpcWorker, time.Second, stopCh)
go wait.Until(c.runUpdateVpcStatusWorker, time.Second, stopCh)
go wait.Until(c.runUpdateProviderNetworkWorker, time.Second, stopCh)
Expand All @@ -524,6 +522,7 @@ func (c *Controller) startWorkers(stopCh <-chan struct{}) {
// run in a single worker to avoid delete the last vip, which will lead ovn to delete the loadbalancer
go wait.Until(c.runDeleteServiceWorker, time.Second, stopCh)
}

for i := 0; i < c.config.WorkerNum; i++ {
go wait.Until(c.runAddPodWorker, time.Second, stopCh)
go wait.Until(c.runDeletePodWorker, time.Second, stopCh)
Expand Down
9 changes: 6 additions & 3 deletions pkg/controller/gc.go
Original file line number Diff line number Diff line change
Expand Up @@ -218,19 +218,22 @@ func (c *Controller) markAndCleanLSP() error {
continue
}
providerName := strings.ReplaceAll(k, util.AllocatedAnnotationSuffix, "")
isProviderovn, err := c.isOVNProvided(providerName, pod)
isProviderOvn, err := c.isOVNProvided(providerName, pod)
if err != nil {
klog.Errorf("determine if provider is ovn failed %v", err)
}
if !isProviderovn {
if !isProviderOvn {
continue
}
ipNames = append(ipNames, ovs.PodNameToPortName(pod.Name, pod.Namespace, providerName))
}
}
for _, node := range nodes {
ipNames = append(ipNames, fmt.Sprintf("node-%s", node.Name))
if node.Annotations[util.AllocatedAnnotation] == "true" {
ipNames = append(ipNames, fmt.Sprintf("node-%s", node.Name))
}
}

lsps, err := c.ovnClient.ListLogicalSwitchPort(c.config.EnableExternalVpc)
if err != nil {
klog.Errorf("failed to list logical switch port, %v", err)
Expand Down
2 changes: 1 addition & 1 deletion pkg/controller/network_policy.go
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ func (c *Controller) handleUpdateNp(key string) error {
}

err = c.ovnClient.SetPortsToPortGroup(pgName, ports)
if err != nil {
if err != nil && !strings.Contains(err.Error(), "not found") {
klog.Errorf("failed to set port group, %v", err)
return err
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/controller/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -701,7 +701,7 @@ func (c *Controller) checkPodsChangedOnNode(pgName string, ports []string) (bool
portIds := make([]string, 0, len(ports))
for _, port := range ports {
portId, err := c.ovnClient.ConvertLspNameToUuid(port)
if err != nil {
if err != nil && !strings.Contains(err.Error(), "not found") {
klog.Errorf("failed to convert lsp name to uuid, %v", err)
continue
}
Expand Down
6 changes: 3 additions & 3 deletions pkg/controller/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -880,18 +880,18 @@ func (c *Controller) getPodDefaultSubnet(pod *v1.Pod) (*kubeovnv1.Subnet, error)
} else {
ns, err := c.namespacesLister.Get(pod.Namespace)
if err != nil {
klog.Errorf("failed to get namespace %v", err)
klog.Errorf("failed to get namespace %s, %v", pod.Namespace, err)
return nil, err
}
if ns.Annotations == nil {
err = fmt.Errorf("namespace network annotations is nil")
err = fmt.Errorf("namespace %s network annotations is nil", pod.Namespace)
klog.Error(err)
return nil, err
}

subnetName = ns.Annotations[util.LogicalSwitchAnnotation]
if subnetName == "" {
err = fmt.Errorf("namespace default logical switch is not found")
err = fmt.Errorf("namespace %s default logical switch is not found", pod.Namespace)
klog.Error(err)
return nil, err
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/daemon/init.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ func InitNodeGateway(config *Configuration) error {
return err
}
if node.Annotations[util.IpAddressAnnotation] == "" {
klog.Errorf("no ovn0 address for node %s, please check kube-ovn-controller logs", nodeName)
klog.Warningf("no ovn0 address for node %s, please check kube-ovn-controller logs", nodeName)
time.Sleep(3 * time.Second)
continue
}
Expand Down
39 changes: 39 additions & 0 deletions pkg/util/klog_metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
package util

import (
"github.com/prometheus/client_golang/prometheus"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/klog"
"time"
)

var (
klogLinesGaugeVec = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "klog_lines_total",
Help: "Total number of klog messages.",
}, []string{"level"})
klogBytesGaugeVec = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "klog_bytes_total",
Help: "Total size of klog messages.",
}, []string{"level"})
)

func InitKlogMetrics() {
registerKlogMetrics()
go wait.Until(fetchKlogMetrics, 5*time.Second, nil)
}

func registerKlogMetrics() {
prometheus.MustRegister(klogLinesGaugeVec)
prometheus.MustRegister(klogBytesGaugeVec)
}

func fetchKlogMetrics() {
klogLinesGaugeVec.WithLabelValues("INFO").Set(float64(klog.Stats.Info.Lines()))
klogLinesGaugeVec.WithLabelValues("WARN").Set(float64(klog.Stats.Warning.Lines()))
klogLinesGaugeVec.WithLabelValues("ERROR").Set(float64(klog.Stats.Error.Lines()))

klogBytesGaugeVec.WithLabelValues("INFO").Set(float64(klog.Stats.Info.Bytes()))
klogBytesGaugeVec.WithLabelValues("WARN").Set(float64(klog.Stats.Warning.Bytes()))
klogBytesGaugeVec.WithLabelValues("ERROR").Set(float64(klog.Stats.Error.Bytes()))
}

0 comments on commit d384232

Please sign in to comment.