Skip to content

Commit

Permalink
Merge pull request #353 from alexanderConstantinescu/bug/2004720
Browse files Browse the repository at this point in the history
[release-4.6] Bug 2004720: improve SDN's OVS healthcheck and logging
  • Loading branch information
openshift-merge-robot committed Sep 16, 2021
2 parents 59096ce + 9a4ade1 commit bc715d1
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 112 deletions.
103 changes: 0 additions & 103 deletions pkg/network/node/healthcheck.go

This file was deleted.

36 changes: 36 additions & 0 deletions pkg/network/node/healthcheck_ovs.go
@@ -0,0 +1,36 @@
package node

import (
"fmt"
"os"
"time"

"github.com/openshift/sdn/pkg/network/node/ovs/ovsclient"
"k8s.io/klog"
)

const (
ovsDialDefaultNetwork = "unix"
ovsDialDefaultAddress = "/var/run/openvswitch/db.sock"
)

func healthCheckOVS() error {
klog.Infof("Starting OVS health check")
c, err := ovsclient.DialTimeout(ovsDialDefaultNetwork, ovsDialDefaultAddress, time.Minute)
if err != nil {
return fmt.Errorf("Error connecting to OVS: %v", err)
}
if err := c.Ping(); err != nil {
if cErr := c.Close(); cErr != nil {
return fmt.Errorf("Error pinging OVS, err: %v, and closing the connection, err: %v", err, cErr)
}
return fmt.Errorf("Error pinging OVS, err: %v", err)
}
go func() {
klog.Infof("Starting SDN-OVS disconnection go-routine")
c.WaitForDisconnect()
klog.Errorf("Detected OVS server change, restarting")
os.Exit(1)
}()
return nil
}
7 changes: 4 additions & 3 deletions pkg/network/node/ovs/ovs.go
Expand Up @@ -106,10 +106,11 @@ const (
OVS_VSCTL = "ovs-vsctl"
)

// ~0.05 seconds in total
var ovsBackoff utilwait.Backoff = utilwait.Backoff{
Duration: 500 * time.Millisecond,
Duration: 10 * time.Millisecond,
Factor: 1.25,
Steps: 10,
Steps: 4,
}

// ovsExec implements ovs.Interface via calls to ovs-ofctl and ovs-vsctl
Expand Down Expand Up @@ -155,7 +156,7 @@ func (ovsif *ovsExec) execWithStdin(cmd string, stdinArgs []string, args ...stri

output, err := kcmd.CombinedOutput()
if err != nil {
klog.V(2).Infof("Error executing %s: %s", cmd, string(output))
klog.Errorf("Error executing cmd: %s with args: %v, output: \n%s", cmd, args, string(output))
return "", err
}

Expand Down
7 changes: 1 addition & 6 deletions pkg/network/node/sdn_controller.go
Expand Up @@ -123,7 +123,7 @@ func (plugin *OsdnNode) SetupSDN() (bool, map[string]podNetworkInfo, error) {

plugin.localGatewayCIDR = fmt.Sprintf("%s/%d", localSubnetGateway, localSubnetMaskLength)

if err := waitForOVS(ovsDialDefaultNetwork, ovsDialDefaultAddress); err != nil {
if err := healthCheckOVS(); err != nil {
return false, nil, err
}

Expand Down Expand Up @@ -151,11 +151,6 @@ func (plugin *OsdnNode) FinishSetupSDN() error {
if err != nil {
return err
}

// TODO: make it possible to safely reestablish node configuration after restart
// If OVS goes down and fails the health check, restart the entire process
runOVSHealthCheck(ovsDialDefaultNetwork, ovsDialDefaultAddress, plugin.alreadySetUp)

return nil
}

Expand Down

0 comments on commit bc715d1

Please sign in to comment.