Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bug 2099755: Add new EgressIP config option "egressip-reachability-total-timeout" #1156

Merged
merged 1 commit into from
Jun 23, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
19 changes: 15 additions & 4 deletions go-controller/pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,9 @@ var (
Metrics MetricsConfig

// OVNKubernetesFeatureConfig holds OVN-Kubernetes feature enhancement config file parameters and command-line overrides
OVNKubernetesFeature OVNKubernetesFeatureConfig
OVNKubernetesFeature = OVNKubernetesFeatureConfig{
EgressIPReachabiltyTotalTimeout: 1,
}

// OvnNorth holds northbound OVN database client and server authentication and location details
OvnNorth OvnAuthConfig
Expand Down Expand Up @@ -323,9 +325,12 @@ type MetricsConfig struct {

// OVNKubernetesFeatureConfig holds OVN-Kubernetes feature enhancement config file parameters and command-line overrides
type OVNKubernetesFeatureConfig struct {
EnableEgressIP bool `gcfg:"enable-egress-ip"`
EnableEgressFirewall bool `gcfg:"enable-egress-firewall"`
EnableEgressQoS bool `gcfg:"enable-egress-qos"`
// EgressIP feature is enabled
EnableEgressIP bool `gcfg:"enable-egress-ip"`
// EgressIP node reachability total timeout in seconds
EgressIPReachabiltyTotalTimeout int `gcfg:"egressip-reachability-total-timeout"`
EnableEgressFirewall bool `gcfg:"enable-egress-firewall"`
EnableEgressQoS bool `gcfg:"enable-egress-qos"`
}

// GatewayMode holds the node gateway mode
Expand Down Expand Up @@ -847,6 +852,12 @@ var OVNK8sFeatureFlags = []cli.Flag{
Destination: &cliConfig.OVNKubernetesFeature.EnableEgressIP,
Value: OVNKubernetesFeature.EnableEgressIP,
},
&cli.IntFlag{
Name: "egressip-reachability-total-timeout",
Usage: "EgressIP node reachability total timeout in seconds (default: 1)",
Destination: &cliConfig.OVNKubernetesFeature.EgressIPReachabiltyTotalTimeout,
Value: 1,
},
&cli.BoolFlag{
Name: "enable-egress-firewall",
Usage: "Configure to use EgressFirewall CRD feature with ovn-kubernetes.",
Expand Down
10 changes: 9 additions & 1 deletion go-controller/pkg/config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,9 @@ cluster-subnets=11.132.0.0/14/23

[ovnkubenode]
mode=full

[ovnkubernetesfeature]
egressip-reachability-total-timeout=3
`

var newData string
Expand Down Expand Up @@ -292,6 +295,7 @@ var _ = Describe("Config Operations", func() {
gomega.Expect(OvnKubeNode.Mode).To(gomega.Equal(types.NodeModeFull))
gomega.Expect(OvnKubeNode.MgmtPortNetdev).To(gomega.Equal(""))
gomega.Expect(Gateway.RouterSubnet).To(gomega.Equal(""))
gomega.Expect(OVNKubernetesFeature.EgressIPReachabiltyTotalTimeout).To(gomega.Equal(1))

for _, a := range []OvnAuthConfig{OvnNorth, OvnSouth} {
gomega.Expect(a.Scheme).To(gomega.Equal(OvnDBSchemeUnix))
Expand Down Expand Up @@ -594,6 +598,7 @@ var _ = Describe("Config Operations", func() {
gomega.Expect(Gateway.RouterSubnet).To(gomega.Equal("10.50.0.0/16"))

gomega.Expect(HybridOverlay.Enabled).To(gomega.BeTrue())
gomega.Expect(OVNKubernetesFeature.EgressIPReachabiltyTotalTimeout).To(gomega.Equal(3))
gomega.Expect(HybridOverlay.ClusterSubnets).To(gomega.Equal([]CIDRNetworkEntry{
{ovntest.MustParseIPNet("11.132.0.0/14"), 23},
}))
Expand Down Expand Up @@ -673,6 +678,7 @@ var _ = Describe("Config Operations", func() {
gomega.Expect(Gateway.RouterSubnet).To(gomega.Equal("10.55.0.0/16"))

gomega.Expect(HybridOverlay.Enabled).To(gomega.BeTrue())
gomega.Expect(OVNKubernetesFeature.EgressIPReachabiltyTotalTimeout).To(gomega.Equal(5))
gomega.Expect(HybridOverlay.ClusterSubnets).To(gomega.Equal([]CIDRNetworkEntry{
{ovntest.MustParseIPNet("11.132.0.0/14"), 23},
}))
Expand Down Expand Up @@ -727,6 +733,7 @@ var _ = Describe("Config Operations", func() {
"-metrics-enable-pprof=false",
"-ofctrl-wait-before-clear=5000",
"-metrics-enable-config-duration=true",
"-egressip-reachability-total-timeout=5",
}
err = app.Run(cliArgs)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
Expand Down Expand Up @@ -1062,7 +1069,7 @@ enable-pprof=true
gomega.Expect(OvnSouth.Address).To(
gomega.Equal("ssl:6.5.4.1:6652,ssl:6.5.4.2:6652,ssl:6.5.4.3:6652"))
gomega.Expect(OvnSouth.CertCommonName).To(gomega.Equal("testsbcommonname"))

gomega.Expect(OVNKubernetesFeature.EgressIPReachabiltyTotalTimeout).To(gomega.Equal(3))
return nil
}
cliArgs := []string{
Expand Down Expand Up @@ -1098,6 +1105,7 @@ enable-pprof=true
"-sb-client-cert=/client/cert2",
"-sb-client-cacert=/client/cacert2",
"-sb-cert-common-name=testsbcommonname",
"-egressip-reachability-total-timeout=3",
}
err = app.Run(cliArgs)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
Expand Down
43 changes: 37 additions & 6 deletions go-controller/pkg/ovn/egressip.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ import (
)

type egressIPDialer interface {
dial(ip net.IP) bool
dial(ip net.IP, timeout time.Duration) bool
}

var dialer egressIPDialer = &egressIPDial{}
Expand Down Expand Up @@ -1839,6 +1839,8 @@ type egressIPController struct {
nbClient libovsdbclient.Client
// watchFactory watching k8s objects
watchFactory *factory.WatchFactory
// EgressIP Node reachability total timeout configuration
egressIPTotalTimeout int
}

// addPodEgressIPAssignment will program OVN with logical router policies
Expand Down Expand Up @@ -2108,11 +2110,41 @@ func (oc *Controller) checkEgressNodesReachability() {
}

func (oc *Controller) isReachable(node *egressNode) bool {
for _, ip := range node.mgmtIPs {
if dialer.dial(ip) {
return true
var retryTimeOut, initialRetryTimeOut time.Duration

numMgmtIPs := len(node.mgmtIPs)
if numMgmtIPs == 0 {
return false
}

switch oc.eIPC.egressIPTotalTimeout {
// Check if we need to do node reachability check
case 0:
return true
case 1:
// Using time duration for initial retry with 700/numIPs msec and retry of 100/numIPs msec
// to ensure total wait time will be in range with the configured value including a sleep of 100msec between attempts.
initialRetryTimeOut = time.Duration(700/numMgmtIPs) * time.Millisecond
retryTimeOut = time.Duration(100/numMgmtIPs) * time.Millisecond
default:
// Using time duration for initial retry with 900/numIPs msec
// to ensure total wait time will be in range with the configured value including a sleep of 100msec between attempts.
initialRetryTimeOut = time.Duration(900/numMgmtIPs) * time.Millisecond
retryTimeOut = initialRetryTimeOut
}

timeout := initialRetryTimeOut
endTime := time.Now().Add(time.Second * time.Duration(oc.eIPC.egressIPTotalTimeout))
for time.Now().Before(endTime) {
for _, ip := range node.mgmtIPs {
if dialer.dial(ip, timeout) {
return true
}
}
time.Sleep(100 * time.Millisecond)
timeout = retryTimeOut
}
klog.Errorf("Failed reachability check for %s", node.name)
return false
}

Expand All @@ -2125,8 +2157,7 @@ type egressIPDial struct{}
// we will return false). If the node is online then we presumably will get a "connection
// refused" error; but the code below assumes that anything other than timeout or "no
// route" indicates that the node is online.
func (e *egressIPDial) dial(ip net.IP) bool {
timeout := time.Second
func (e *egressIPDial) dial(ip net.IP, timeout time.Duration) bool {
conn, err := net.DialTimeout("tcp", net.JoinHostPort(ip.String(), "9"), timeout)
if conn != nil {
conn.Close()
Expand Down
2 changes: 1 addition & 1 deletion go-controller/pkg/ovn/egressip_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ import (

type fakeEgressIPDialer struct{}

func (f fakeEgressIPDialer) dial(ip net.IP) bool {
func (f fakeEgressIPDialer) dial(ip net.IP, timeout time.Duration) bool {
return true
}

Expand Down
4 changes: 4 additions & 0 deletions go-controller/pkg/ovn/ovn.go
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,7 @@ func NewOvnController(ovnClient *util.OVNClientset, wf *factory.WatchFactory, st
allocator: allocator{&sync.Mutex{}, make(map[string]*egressNode)},
nbClient: libovsdbOvnNBClient,
watchFactory: wf,
egressIPTotalTimeout: config.OVNKubernetesFeature.EgressIPReachabiltyTotalTimeout,
},
loadbalancerClusterCache: make(map[kapi.Protocol]string),
multicastSupport: config.EnableMulticast,
Expand Down Expand Up @@ -397,6 +398,9 @@ func (oc *Controller) Run(ctx context.Context, wg *sync.WaitGroup) error {
return err
}
}
if config.OVNKubernetesFeature.EgressIPReachabiltyTotalTimeout == 0 {
klog.V(2).Infof("EgressIP node reachability check disabled")
}
}

if config.OVNKubernetesFeature.EnableEgressFirewall {
Expand Down