Skip to content

Commit

Permalink
Add new EgressIP config option "egressip-reachability-total-timeout"
Browse files Browse the repository at this point in the history
Users can change node's reachability total timeout from 1sec "default"
to different values using this new option.

Also it can be used to disable reachability check if users specify a value
of 0.

Signed-off-by: Mohamed Mahmoud <mmahmoud@redhat.com>
  • Loading branch information
msherif1234 committed Jun 22, 2022
1 parent b4c8535 commit 9e4358d
Show file tree
Hide file tree
Showing 5 changed files with 66 additions and 12 deletions.
19 changes: 15 additions & 4 deletions go-controller/pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,9 @@ var (
Metrics MetricsConfig

// OVNKubernetesFeatureConfig holds OVN-Kubernetes feature enhancement config file parameters and command-line overrides
OVNKubernetesFeature OVNKubernetesFeatureConfig
OVNKubernetesFeature = OVNKubernetesFeatureConfig{
EgressIPReachabiltyTotalTimeout: 1,
}

// OvnNorth holds northbound OVN database client and server authentication and location details
OvnNorth OvnAuthConfig
Expand Down Expand Up @@ -323,9 +325,12 @@ type MetricsConfig struct {

// OVNKubernetesFeatureConfig holds OVN-Kubernetes feature enhancement config file parameters and command-line overrides
type OVNKubernetesFeatureConfig struct {
EnableEgressIP bool `gcfg:"enable-egress-ip"`
EnableEgressFirewall bool `gcfg:"enable-egress-firewall"`
EnableEgressQoS bool `gcfg:"enable-egress-qos"`
// EgressIP feature is enabled
EnableEgressIP bool `gcfg:"enable-egress-ip"`
// EgressIP node reachability total timeout in seconds
EgressIPReachabiltyTotalTimeout int `gcfg:"egressip-reachability-total-timeout"`
EnableEgressFirewall bool `gcfg:"enable-egress-firewall"`
EnableEgressQoS bool `gcfg:"enable-egress-qos"`
}

// GatewayMode holds the node gateway mode
Expand Down Expand Up @@ -847,6 +852,12 @@ var OVNK8sFeatureFlags = []cli.Flag{
Destination: &cliConfig.OVNKubernetesFeature.EnableEgressIP,
Value: OVNKubernetesFeature.EnableEgressIP,
},
&cli.IntFlag{
Name: "egressip-reachability-total-timeout",
Usage: "EgressIP node reachability total timeout in seconds (default: 1)",
Destination: &cliConfig.OVNKubernetesFeature.EgressIPReachabiltyTotalTimeout,
Value: 1,
},
&cli.BoolFlag{
Name: "enable-egress-firewall",
Usage: "Configure to use EgressFirewall CRD feature with ovn-kubernetes.",
Expand Down
10 changes: 9 additions & 1 deletion go-controller/pkg/config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,9 @@ cluster-subnets=11.132.0.0/14/23
[ovnkubenode]
mode=full
[ovnkubernetesfeature]
egressip-reachability-total-timeout=3
`

var newData string
Expand Down Expand Up @@ -292,6 +295,7 @@ var _ = Describe("Config Operations", func() {
gomega.Expect(OvnKubeNode.Mode).To(gomega.Equal(types.NodeModeFull))
gomega.Expect(OvnKubeNode.MgmtPortNetdev).To(gomega.Equal(""))
gomega.Expect(Gateway.RouterSubnet).To(gomega.Equal(""))
gomega.Expect(OVNKubernetesFeature.EgressIPReachabiltyTotalTimeout).To(gomega.Equal(1))

for _, a := range []OvnAuthConfig{OvnNorth, OvnSouth} {
gomega.Expect(a.Scheme).To(gomega.Equal(OvnDBSchemeUnix))
Expand Down Expand Up @@ -594,6 +598,7 @@ var _ = Describe("Config Operations", func() {
gomega.Expect(Gateway.RouterSubnet).To(gomega.Equal("10.50.0.0/16"))

gomega.Expect(HybridOverlay.Enabled).To(gomega.BeTrue())
gomega.Expect(OVNKubernetesFeature.EgressIPReachabiltyTotalTimeout).To(gomega.Equal(3))
gomega.Expect(HybridOverlay.ClusterSubnets).To(gomega.Equal([]CIDRNetworkEntry{
{ovntest.MustParseIPNet("11.132.0.0/14"), 23},
}))
Expand Down Expand Up @@ -673,6 +678,7 @@ var _ = Describe("Config Operations", func() {
gomega.Expect(Gateway.RouterSubnet).To(gomega.Equal("10.55.0.0/16"))

gomega.Expect(HybridOverlay.Enabled).To(gomega.BeTrue())
gomega.Expect(OVNKubernetesFeature.EgressIPReachabiltyTotalTimeout).To(gomega.Equal(5))
gomega.Expect(HybridOverlay.ClusterSubnets).To(gomega.Equal([]CIDRNetworkEntry{
{ovntest.MustParseIPNet("11.132.0.0/14"), 23},
}))
Expand Down Expand Up @@ -727,6 +733,7 @@ var _ = Describe("Config Operations", func() {
"-metrics-enable-pprof=false",
"-ofctrl-wait-before-clear=5000",
"-metrics-enable-config-duration=true",
"-egressip-reachability-total-timeout=5",
}
err = app.Run(cliArgs)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
Expand Down Expand Up @@ -1062,7 +1069,7 @@ enable-pprof=true
gomega.Expect(OvnSouth.Address).To(
gomega.Equal("ssl:6.5.4.1:6652,ssl:6.5.4.2:6652,ssl:6.5.4.3:6652"))
gomega.Expect(OvnSouth.CertCommonName).To(gomega.Equal("testsbcommonname"))

gomega.Expect(OVNKubernetesFeature.EgressIPReachabiltyTotalTimeout).To(gomega.Equal(3))
return nil
}
cliArgs := []string{
Expand Down Expand Up @@ -1098,6 +1105,7 @@ enable-pprof=true
"-sb-client-cert=/client/cert2",
"-sb-client-cacert=/client/cacert2",
"-sb-cert-common-name=testsbcommonname",
"-egressip-reachability-total-timeout=3",
}
err = app.Run(cliArgs)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
Expand Down
43 changes: 37 additions & 6 deletions go-controller/pkg/ovn/egressip.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ import (
)

type egressIPDialer interface {
dial(ip net.IP) bool
dial(ip net.IP, timeout time.Duration) bool
}

var dialer egressIPDialer = &egressIPDial{}
Expand Down Expand Up @@ -1835,6 +1835,8 @@ type egressIPController struct {
nbClient libovsdbclient.Client
// watchFactory watching k8s objects
watchFactory *factory.WatchFactory
// EgressIP Node reachability total timeout configuration
egressIPTotalTimeout int
}

// addPodEgressIPAssignment will program OVN with logical router policies
Expand Down Expand Up @@ -2099,11 +2101,41 @@ func (oc *Controller) checkEgressNodesReachability() {
}

func (oc *Controller) isReachable(node *egressNode) bool {
for _, ip := range node.mgmtIPs {
if dialer.dial(ip) {
return true
var retryTimeOut, initialRetryTimeOut time.Duration

numMgmtIPs := len(node.mgmtIPs)
if numMgmtIPs == 0 {
return false
}

switch oc.eIPC.egressIPTotalTimeout {
// Check if we need to do node reachability check
case 0:
return true
case 1:
// Using time duration for initial retry with 700/numIPs msec and retry of 100/numIPs msec
// to ensure total wait time will be in range with the configured value including a sleep of 100msec between attempts.
initialRetryTimeOut = time.Duration(700/numMgmtIPs) * time.Millisecond
retryTimeOut = time.Duration(100/numMgmtIPs) * time.Millisecond
default:
// Using time duration for initial retry with 900/numIPs msec
// to ensure total wait time will be in range with the configured value including a sleep of 100msec between attempts.
initialRetryTimeOut = time.Duration(900/numMgmtIPs) * time.Millisecond
retryTimeOut = initialRetryTimeOut
}

timeout := initialRetryTimeOut
endTime := time.Now().Add(time.Second * time.Duration(oc.eIPC.egressIPTotalTimeout))
for time.Now().Before(endTime) {
for _, ip := range node.mgmtIPs {
if dialer.dial(ip, timeout) {
return true
}
}
time.Sleep(100 * time.Millisecond)
timeout = retryTimeOut
}
klog.Errorf("Failed reachability check for %s", node.name)
return false
}

Expand All @@ -2116,8 +2148,7 @@ type egressIPDial struct{}
// we will return false). If the node is online then we presumably will get a "connection
// refused" error; but the code below assumes that anything other than timeout or "no
// route" indicates that the node is online.
func (e *egressIPDial) dial(ip net.IP) bool {
timeout := time.Second
func (e *egressIPDial) dial(ip net.IP, timeout time.Duration) bool {
conn, err := net.DialTimeout("tcp", net.JoinHostPort(ip.String(), "9"), timeout)
if conn != nil {
conn.Close()
Expand Down
2 changes: 1 addition & 1 deletion go-controller/pkg/ovn/egressip_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ import (

type fakeEgressIPDialer struct{}

func (f fakeEgressIPDialer) dial(ip net.IP) bool {
func (f fakeEgressIPDialer) dial(ip net.IP, timeout time.Duration) bool {
return true
}

Expand Down
4 changes: 4 additions & 0 deletions go-controller/pkg/ovn/ovn.go
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,7 @@ func NewOvnController(ovnClient *util.OVNClientset, wf *factory.WatchFactory, st
allocator: allocator{&sync.Mutex{}, make(map[string]*egressNode)},
nbClient: libovsdbOvnNBClient,
watchFactory: wf,
egressIPTotalTimeout: config.OVNKubernetesFeature.EgressIPReachabiltyTotalTimeout,
},
loadbalancerClusterCache: make(map[kapi.Protocol]string),
multicastSupport: config.EnableMulticast,
Expand Down Expand Up @@ -394,6 +395,9 @@ func (oc *Controller) Run(ctx context.Context, wg *sync.WaitGroup) error {
return err
}
}
if config.OVNKubernetesFeature.EgressIPReachabiltyTotalTimeout == 0 {
klog.V(2).Infof("EgressIP node reachability check disabled")
}
}

if config.OVNKubernetesFeature.EnableEgressFirewall {
Expand Down

0 comments on commit 9e4358d

Please sign in to comment.