Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

underlay: fix link name exchange #2516

Merged
merged 6 commits into from
Mar 22, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions dist/images/start-ovs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,8 @@ function exchange_link_names() {
ip link set $br down
ip link set $br name $port
ip link set $port up
# wait systemd-networkd to finish interface configuration
sleep 0.1

# transfer IPv4 routes
default_ipv4_routes=()
Expand Down
37 changes: 13 additions & 24 deletions pkg/daemon/init.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,34 +148,11 @@ func ovsCleanProviderNetwork(provider string) error {
for idx, m = range brMappings {
if strings.HasPrefix(m, mappingPrefix) {
brName = m[len(mappingPrefix):]
klog.V(3).Infof("found bridge name for provider %s: %s", provider, brName)
break
}
}

if output, err = ovs.Exec("list-br"); err != nil {
return fmt.Errorf("failed to list OVS bridge %v: %q", err, output)
}

if !util.ContainsString(strings.Split(output, "\n"), brName) {
return nil
}

// get host nic
if output, err = ovs.Exec("list-ports", brName); err != nil {
return fmt.Errorf("failed to list ports of OVS bridge %s, %v: %q", brName, err, output)
}

// remove host nic from the external bridge
if output != "" {
for _, port := range strings.Split(output, "\n") {
if err = removeProviderNic(port, brName); err != nil {
errMsg := fmt.Errorf("failed to remove port %s from external bridge %s: %v", port, brName, err)
klog.Error(errMsg)
return errMsg
}
}
}

if idx != len(brMappings) {
brMappings = append(brMappings[:idx], brMappings[idx+1:]...)
if len(brMappings) == 0 {
Expand Down Expand Up @@ -207,6 +184,15 @@ func ovsCleanProviderNetwork(provider string) error {
return fmt.Errorf("failed to set ovn-chassis-mac-mappings, %v: %q", err, output)
}

if output, err = ovs.Exec("list-br"); err != nil {
return fmt.Errorf("failed to list OVS bridge %v: %q", err, output)
}

if !util.ContainsString(strings.Split(output, "\n"), brName) {
klog.V(3).Infof("ovs bridge %s not found", brName)
return nil
}

// get host nic
if output, err = ovs.Exec("list-ports", brName); err != nil {
return fmt.Errorf("failed to list ports of OVS bridge %s, %v: %q", brName, err, output)
Expand All @@ -215,11 +201,13 @@ func ovsCleanProviderNetwork(provider string) error {
// remove host nic from the external bridge
if output != "" {
for _, port := range strings.Split(output, "\n") {
klog.V(3).Infof("removing ovs port %s from bridge %s", port, brName)
if err = removeProviderNic(port, brName); err != nil {
errMsg := fmt.Errorf("failed to remove port %s from external bridge %s: %v", port, brName, err)
klog.Error(errMsg)
return errMsg
}
klog.V(3).Infof("ovs port %s has been removed from bridge %s", port, brName)
}
}

Expand All @@ -228,6 +216,7 @@ func ovsCleanProviderNetwork(provider string) error {
if output, err = ovs.Exec(ovs.IfExists, "del-br", brName); err != nil {
return fmt.Errorf("failed to remove OVS bridge %s, %v: %q", brName, err, output)
}
klog.V(3).Infof("ovs bridge %s has been deleted", brName)

if br := util.ExternalBridgeName(provider); br != brName {
if _, err = changeProvideNicName(br, brName); err != nil {
Expand Down
53 changes: 46 additions & 7 deletions pkg/daemon/init_linux.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package daemon

import (
"syscall"
"time"

"k8s.io/klog/v2"

Expand Down Expand Up @@ -45,6 +45,39 @@ func nmSetManaged(device string, managed bool) error {
return nil
}

// wait systemd-networkd to finish interface configuration
func waitNetworkdConfiguration(linkIndex int) {
done := make(chan struct{})
ch := make(chan netlink.RouteUpdate)
if err := netlink.RouteSubscribe(ch, done); err != nil {
klog.Warningf("failed to subscribe route update events: %v", err)
klog.Info("Waiting 100ms ...")
time.Sleep(100 * time.Millisecond)
return
}

// wait route event on the link for 50ms
timer := time.NewTimer(50 * time.Millisecond)
for {
select {
case <-timer.C:
// timeout, interface configuration is expected to be completed
done <- struct{}{}
return
case event := <-ch:
if event.LinkIndex == linkIndex {
// received a route event on the link
// stop the timer
if !timer.Stop() {
<-timer.C
}
// reset the timer, wait for another 50ms
timer.Reset(50 * time.Millisecond)
}
}
}
}

func changeProvideNicName(current, target string) (bool, error) {
link, err := netlink.LinkByName(current)
if err != nil {
Expand All @@ -56,17 +89,17 @@ func changeProvideNicName(current, target string) (bool, error) {
return false, err
}
if link.Type() == "openvswitch" {
klog.Infof("%s is an openvswitch interface, skip", current)
klog.V(3).Infof("%s is an openvswitch interface, skip", current)
return true, nil
}

// set link unmanaged by NetworkManager to avoid getting new IP by DHCP
// set link unmanaged by NetworkManager
if err = nmSetManaged(current, false); err != nil {
klog.Errorf("failed set device %s to unmanaged by NetworkManager: %v", current, err)
return false, err
}

klog.Infof("change nic name from %s to %s", current, target)
klog.Infof("renaming link %s as %s", current, target)
addresses, err := netlink.AddrList(link, netlink.FAMILY_ALL)
if err != nil {
klog.Errorf("failed to list addresses of link %s: %v", current, err)
Expand All @@ -90,15 +123,20 @@ func changeProvideNicName(current, target string) (bool, error) {
klog.Errorf("failed to set link %s up: %v", target, err)
return false, err
}
klog.Infof("link %s has been renamed as %s", current, target)

waitNetworkdConfiguration(link.Attrs().Index)

for _, addr := range addresses {
if addr.IP.IsLinkLocalUnicast() {
continue
}
addr.Label = ""
if err = netlink.AddrReplace(link, &addr); err != nil {
klog.Errorf("failed to replace address %s: %v", addr.String(), err)
klog.Errorf("failed to replace address %q: %v", addr.String(), err)
return false, err
}
klog.Infof("address %q has been added/replaced to link %s", addr.String(), target)
}

for _, scope := range routeScopeOrders {
Expand All @@ -107,10 +145,11 @@ func changeProvideNicName(current, target string) (bool, error) {
continue
}
if route.Scope == scope {
if err = netlink.RouteReplace(&route); err != nil && err != syscall.EEXIST {
klog.Errorf("failed to replace route %s: %v", route.String(), err)
if err = netlink.RouteReplace(&route); err != nil {
klog.Errorf("failed to replace route %q to %s: %v", route.String(), target, err)
return false, err
}
klog.Infof("route %q has been added/replaced to link %s", route.String(), target)
}
}
}
Expand Down
17 changes: 8 additions & 9 deletions pkg/daemon/ovs.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,20 +88,19 @@ func configExternalBridge(provider, bridge, nic string, exchangeLinkName, macLea
if err != nil {
return fmt.Errorf("failed to check OVS bridge existence: %v", err)
}
output, err := ovs.Exec(ovs.MayExist, "add-br", bridge,
cmd := []string{
ovs.MayExist, "add-br", bridge,
"--", "set", "bridge", bridge, fmt.Sprintf("other_config:mac-learning-fallback=%v", macLearningFallback),
"--", "set", "bridge", bridge, "external_ids:vendor="+util.CniTypeName,
"--", "set", "bridge", bridge, "external_ids:vendor=" + util.CniTypeName,
"--", "set", "bridge", bridge, fmt.Sprintf("external_ids:exchange-link-name=%v", exchangeLinkName),
)
if err != nil {
return fmt.Errorf("failed to create OVS bridge %s, %v: %q", bridge, err, output)
}
if !brExists {
// assign a new generated mac address only when the bridge is newly created
output, err = ovs.Exec("set", "bridge", bridge, fmt.Sprintf(`other-config:hwaddr="%s"`, util.GenerateMac()))
if err != nil {
return fmt.Errorf("failed to set hwaddr of OVS bridge %s, %v: %q", bridge, err, output)
}
cmd = append(cmd, "--", "set", "bridge", bridge, fmt.Sprintf(`other-config:hwaddr="%s"`, util.GenerateMac()))
}
output, err := ovs.Exec(cmd...)
if err != nil {
return fmt.Errorf("failed to create OVS bridge %s, %v: %q", bridge, err, output)
}
if output, err = ovs.Exec("list-ports", bridge); err != nil {
return fmt.Errorf("failed to list ports of OVS bridge %s, %v: %q", bridge, err, output)
Expand Down
57 changes: 28 additions & 29 deletions pkg/daemon/ovs_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import (
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"net"
"os"
Expand All @@ -13,7 +12,6 @@ import (
"path/filepath"
"regexp"
"strings"
"syscall"
"time"

"github.com/Mellanox/sriovnet"
Expand Down Expand Up @@ -906,35 +904,30 @@ func configProviderNic(nicName, brName string) (int, error) {
return 0, fmt.Errorf("failed to get routes on nic %s: %v", nicName, err)
}

// set link unmanaged by NetworkManager
if err = nmSetManaged(nicName, false); err != nil {
klog.Errorf("failed set device %s to unmanaged by NetworkManager: %v", nicName, err)
return 0, err
}

for _, addr := range addrs {
if addr.IP.IsLinkLocalUnicast() {
// skip 169.254.0.0/16 and fe80::/10
continue
}

if !strings.HasPrefix(addr.Label, nicName) {
if strings.HasPrefix(addr.Label, brName) {
addr.Label = nicName + addr.Label[len(brName):]
} else {
addr.Label = nicName
}
}
if err = netlink.AddrDel(nic, &addr); err != nil {
errMsg := fmt.Errorf("failed to delete address %q on nic %s: %v", addr.String(), nicName, err)
if errors.Is(err, syscall.EADDRNOTAVAIL) {
// the IP address does not exist now
klog.Warning(errMsg)
continue
}
klog.Error(errMsg)
return 0, errMsg
}
klog.Infof("address %q has been removed from link %s", addr.String(), nicName)

if addr.Label != "" {
addr.Label = brName + addr.Label[len(nicName):]
}
addr.Label = ""
if err = netlink.AddrReplace(bridge, &addr); err != nil {
return 0, fmt.Errorf("failed to replace address %q on OVS bridge %s: %v", addr.String(), brName, err)
}
klog.Infof("address %q has been added/replaced to link %s", addr.String(), brName)
}

// keep mac address the same with the provider nic,
Expand Down Expand Up @@ -964,6 +957,7 @@ func configProviderNic(nicName, brName string) (int, error) {
if err = netlink.RouteReplace(&route); err != nil {
return 0, fmt.Errorf("failed to add/replace route %s: %v", route.String(), err)
}
klog.Infof("route %q has been added/replaced to link %s", route.String(), brName)
}
}
}
Expand All @@ -972,6 +966,7 @@ func configProviderNic(nicName, brName string) (int, error) {
"--", "set", "port", nicName, "external_ids:vendor="+util.CniTypeName); err != nil {
return 0, fmt.Errorf("failed to add %s to OVS bridge %s: %v", nicName, brName, err)
}
klog.V(3).Infof("ovs port %s has been added to bridge %s", nicName, brName)

if err = netlink.LinkSetUp(nic); err != nil {
return 0, fmt.Errorf("failed to set link %s up: %v", nicName, err)
Expand Down Expand Up @@ -1031,6 +1026,7 @@ func removeProviderNic(nicName, brName string) error {
if _, err = ovs.Exec(ovs.IfExists, "del-port", brName, nicName); err != nil {
return fmt.Errorf("failed to remove %s from OVS bridge %s: %v", nicName, brName, err)
}
klog.V(3).Infof("ovs port %s has been removed from bridge %s", nicName, brName)

for _, addr := range addrs {
if addr.IP.IsLinkLocalUnicast() {
Expand All @@ -1039,25 +1035,22 @@ func removeProviderNic(nicName, brName string) error {
}

if err = netlink.AddrDel(bridge, &addr); err != nil {
errMsg := fmt.Errorf("failed to delete address %s on OVS bridge %s: %v", addr.String(), brName, err)
if errors.Is(err, syscall.EADDRNOTAVAIL) {
// the IP address does not exist now
klog.Warning(errMsg)
continue
}
errMsg := fmt.Errorf("failed to delete address %q on OVS bridge %s: %v", addr.String(), brName, err)
klog.Error(errMsg)
return errMsg
}
klog.Infof("address %q has been deleted from link %s", addr.String(), brName)

if addr.Label != "" {
addr.Label = nicName + strings.TrimPrefix(addr.Label, brName)
}
addr.Label = ""
if err = netlink.AddrReplace(nic, &addr); err != nil {
return fmt.Errorf("failed to replace address %s on nic %s: %v", addr.String(), nicName, err)
return fmt.Errorf("failed to replace address %q on nic %s: %v", addr.String(), nicName, err)
}
klog.Infof("address %q has been added/replaced to link %s", addr.String(), nicName)
}

if err = netlink.LinkSetDown(bridge); err != nil {
return fmt.Errorf("failed to set OVS bridge %s down: %v", brName, err)
if err = netlink.LinkSetUp(nic); err != nil {
klog.Error("failed to set link %s up: %v", nicName, err)
return err
}

scopeOrders := [...]netlink.Scope{
Expand All @@ -1077,10 +1070,16 @@ func removeProviderNic(nicName, brName string) error {
if err = netlink.RouteReplace(&route); err != nil {
return fmt.Errorf("failed to add/replace route %s: %v", route.String(), err)
}
klog.Infof("route %q has been added/replaced to link %s", route.String(), nicName)
}
}
}

if err = netlink.LinkSetDown(bridge); err != nil {
return fmt.Errorf("failed to set OVS bridge %s down: %v", brName, err)
}
klog.V(3).Infof("link %s has been set down", brName)

return nil
}

Expand Down