Skip to content

Commit

Permalink
Merge pull request #3984 from trozet/fix_mac_changing
Browse files Browse the repository at this point in the history
Adds port security for external OVS bridge for OVN MAC address
  • Loading branch information
trozet committed Dec 21, 2023
2 parents f94db89 + 6ed5d48 commit 0bfb6bd
Show file tree
Hide file tree
Showing 17 changed files with 899 additions and 247 deletions.
10 changes: 5 additions & 5 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,7 @@ jobs:
fail-fast: false
matrix:
# Valid options are:
# target: ["shard-conformance", "control-plane", "multi-homing", "multi-node-zones", "node-ip-migration", "compact-mode"]
# target: ["shard-conformance", "control-plane", "multi-homing", "multi-node-zones", "node-ip-mac-migration", "compact-mode"]
# shard-conformance: hybrid-overlay = multicast-enable = emptylb-enable = false
# control-plane: hybrid-overlay = multicast-enable = emptylb-enable = true
# ha: ["HA", "noHA"]
Expand All @@ -395,8 +395,8 @@ jobs:
- {"target": "control-plane", "ha": "noHA", "gateway-mode": "local", "ipfamily": "ipv4", "disable-snat-multiple-gws": "noSnatGW", "second-bridge": "1br", "ic": "ic-single-node-zones"}
- {"target": "control-plane", "ha": "noHA", "gateway-mode": "shared", "ipfamily": "ipv4", "disable-snat-multiple-gws": "noSnatGW", "second-bridge": "2br", "ic": "ic-single-node-zones"}
- {"target": "multi-homing", "ha": "noHA", "gateway-mode": "local", "ipfamily": "ipv4", "disable-snat-multiple-gws": "SnatGW", "second-bridge": "1br", "ic": "ic-disabled"}
- {"target": "node-ip-migration", "ha": "noHA", "gateway-mode": "shared", "ipfamily": "ipv6", "disable-snat-multiple-gws": "SnatGW", "second-bridge": "1br", "ic": "ic-disabled"}
- {"target": "node-ip-migration", "ha": "noHA", "gateway-mode": "shared", "ipfamily": "ipv4", "disable-snat-multiple-gws": "SnatGW", "second-bridge": "1br", "ic": "ic-single-node-zones"}
- {"target": "node-ip-mac-migration", "ha": "noHA", "gateway-mode": "shared", "ipfamily": "ipv6", "disable-snat-multiple-gws": "SnatGW", "second-bridge": "1br", "ic": "ic-disabled"}
- {"target": "node-ip-mac-migration", "ha": "noHA", "gateway-mode": "shared", "ipfamily": "ipv4", "disable-snat-multiple-gws": "SnatGW", "second-bridge": "1br", "ic": "ic-single-node-zones"}
- {"target": "compact-mode", "ha": "noHA", "gateway-mode": "local", "ipfamily": "ipv4", "disable-snat-multiple-gws": "snatGW", "second-bridge": "1br", "ic": "ic-disabled"}
- {"target": "multi-homing", "ha": "noHA", "gateway-mode": "local", "ipfamily": "dualstack", "disable-snat-multiple-gws": "SnatGW", "second-bridge": "1br", "ic": "ic-single-node-zones"}
- {"target": "multi-node-zones", "ha": "noHA", "gateway-mode": "local", "ipfamily": "ipv4", "disable-snat-multiple-gws": "SnatGW", "second-bridge": "1br", "ic": "ic-multi-node-zones", "num-workers": "3", "num-nodes-per-zone": "2"}
Expand Down Expand Up @@ -483,8 +483,8 @@ jobs:
run: |
if [ "${{ matrix.target }}" == "multi-homing" ]; then
make -C test control-plane WHAT="Multi Homing"
elif [ "${{ matrix.target }}" == "node-ip-migration" ]; then
make -C test control-plane WHAT="Node IP address migration"
elif [ "${{ matrix.target }}" == "node-ip-mac-migration" ]; then
make -C test control-plane WHAT="Node IP and MAC address migration"
elif [ "${{ matrix.target }}" == "compact-mode" ]; then
SINGLE_NODE_CLUSTER="true" make -C test shard-network
elif [ "${{ matrix.target }}" == "multi-node-zones" ]; then
Expand Down
12 changes: 4 additions & 8 deletions go-controller/pkg/node/controllers/egressip/egressip.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,9 @@ type Controller struct {
v6 bool
}

func NewController(eIPInformer egressipinformer.EgressIPInformer, nodeInformer cache.SharedIndexInformer, namespaceInformer coreinformers.NamespaceInformer,
podInformer coreinformers.PodInformer, routeManager *routemanager.Controller, v4, v6 bool, nodeName string) (*Controller, error) {
func NewController(eIPInformer egressipinformer.EgressIPInformer, nodeInformer cache.SharedIndexInformer,
namespaceInformer coreinformers.NamespaceInformer, podInformer coreinformers.PodInformer,
routeManager *routemanager.Controller, v4, v6 bool, nodeName string, linkManager *linkmanager.Controller) (*Controller, error) {

c := &Controller{
eIPLister: eIPInformer.Lister(),
Expand All @@ -160,7 +161,7 @@ func NewController(eIPInformer egressipinformer.EgressIPInformer, nodeInformer c
referencedObjectsLock: sync.RWMutex{},
referencedObjects: map[string]*referencedObjects{},
routeManager: routeManager,
linkManager: linkmanager.NewController(nodeName, v4, v6),
linkManager: linkManager,
ruleManager: iprulemanager.NewController(v4, v6),
iptablesManager: iptables.NewController(),
nodeName: nodeName,
Expand Down Expand Up @@ -285,11 +286,6 @@ func (c *Controller) Run(stopCh <-chan struct{}, wg *sync.WaitGroup, threads int
c.namespaceQueue.ShutDown()
}()
wg.Add(1)
go func() {
c.linkManager.Run(stopCh, 2*time.Minute)
wg.Done()
}()
wg.Add(1)
go func() {
c.iptablesManager.Run(stopCh, 6*time.Minute)
wg.Done()
Expand Down
11 changes: 4 additions & 7 deletions go-controller/pkg/node/controllers/egressip/egressip_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -204,8 +204,9 @@ func initController(namespaces []corev1.Namespace, pods []corev1.Pod, egressIPs
if err := watchFactory.Start(); err != nil {
return nil, nil, err
}
linkManager := linkmanager.NewController(node1Name, v4, v6, nil)
c, err := NewController(watchFactory.EgressIPInformer(), watchFactory.NodeInformer(), watchFactory.NamespaceInformer(),
watchFactory.PodCoreInformer(), rm, v4, v6, node1Name)
watchFactory.PodCoreInformer(), rm, v4, v6, node1Name, linkManager)
if err != nil {
return nil, nil, err
}
Expand Down Expand Up @@ -260,12 +261,8 @@ func runController(testNS ns.NetNS, c *Controller) (cleanupFn, error) {
// we do not call start for our controller because the newly created goroutines will not be set to the correct network namespace,
// so we invoke them manually here and call reconcile manually
// normally executed during Run but we call it manually here because run spawns a go routine that we cannot control its netns during test
wg.Add(1)
go testNS.Do(func(netNS ns.NetNS) error {
c.linkManager.Run(stopCh, 10*time.Millisecond)
wg.Done()
return nil
})
c.linkManager.Run(stopCh, wg)

wg.Add(1)
go testNS.Do(func(netNS ns.NetNS) error {
c.iptablesManager.Run(stopCh, 10*time.Millisecond)
Expand Down
13 changes: 12 additions & 1 deletion go-controller/pkg/node/default_node_network_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import (
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/controllers/egressservice"
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/controllers/upgrade"
nodeipt "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/iptables"
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/linkmanager"
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/ovspinning"
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/routemanager"
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/controller/apbroute"
Expand Down Expand Up @@ -699,6 +700,11 @@ func (nc *DefaultNodeNetworkController) Start(ctx context.Context) error {
nc.routeManager.Run(nc.stopChan, 4*time.Minute)
}()

// Bootstrap flows in OVS if just normal flow is present
if err := bootstrapOVSFlows(); err != nil {
return fmt.Errorf("failed to bootstrap OVS flows: %w", err)
}

if node, err = nc.Kube.GetNode(nc.name); err != nil {
return fmt.Errorf("error retrieving node %s: %v", nc.name, err)
}
Expand Down Expand Up @@ -1154,10 +1160,13 @@ func (nc *DefaultNodeNetworkController) Start(ctx context.Context) error {
}
}

// create link manager, will work for egress IP as well as monitoring MAC changes to default gw bridge
linkManager := linkmanager.NewController(nc.name, config.IPv4Mode, config.IPv6Mode, nc.updateGatewayMAC)

if config.OVNKubernetesFeature.EnableEgressIP && !util.PlatformTypeIsEgressIPCloudProvider() {
c, err := egressip.NewController(nc.watchFactory.EgressIPInformer(), nc.watchFactory.NodeInformer(),
nc.watchFactory.NamespaceInformer(), nc.watchFactory.PodCoreInformer(), nc.routeManager, config.IPv4Mode,
config.IPv6Mode, nc.name)
config.IPv6Mode, nc.name, linkManager)
if err != nil {
return fmt.Errorf("failed to create egress IP controller: %v", err)
}
Expand All @@ -1169,6 +1178,8 @@ func (nc *DefaultNodeNetworkController) Start(ctx context.Context) error {
klog.Infof("Egress IP for non-OVN managed networks is disabled")
}

linkManager.Run(nc.stopChan, nc.wg)

nc.wg.Add(1)
go func() {
defer nc.wg.Done()
Expand Down
3 changes: 3 additions & 0 deletions go-controller/pkg/node/egress_service_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ var _ = Describe("Egress Service Operations", func() {
app.Flags = config.Flags
fExec = ovntest.NewLooseCompareFakeExec()
fakeOvnNode = NewFakeOVNNode(fExec)
fakeOvnNode.fakeExec.AddFakeCmd(&ovntest.ExpectedCmd{
Cmd: "ovs-vsctl --timeout=15 --no-heading --data=bare --format=csv --columns name list interface",
})

config.OVNKubernetesFeature.EnableEgressService = true
_, cidr4, _ := net.ParseCIDR("10.128.0.0/16")
Expand Down
67 changes: 62 additions & 5 deletions go-controller/pkg/node/gateway.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,16 @@ import (
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory"
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/informer"
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube"
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/retry"
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types"
util "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util"

"github.com/pkg/errors"
"github.com/safchain/ethtool"
kapi "k8s.io/api/core/v1"
discovery "k8s.io/api/discovery/v1"
apierrors "k8s.io/apimachinery/pkg/util/errors"
kerrors "k8s.io/apimachinery/pkg/util/errors"
"k8s.io/klog/v2"
)

Expand All @@ -25,9 +28,11 @@ import (
// are kept in sync
type Gateway interface {
informer.ServiceAndEndpointsEventHandler
Init(factory.NodeWatchFactory, <-chan struct{}, *sync.WaitGroup) error
Init(<-chan struct{}, *sync.WaitGroup) error
Start()
GetGatewayBridgeIface() string
SetDefaultGatewayBridgeMAC(addr net.HardwareAddr)
Reconcile() error
}

type gateway struct {
Expand All @@ -44,6 +49,8 @@ type gateway struct {
initFunc func() error
readyFunc func() (bool, error)

servicesRetryFramework *retry.RetryFramework

watchFactory *factory.WatchFactory // used for retry
stopChan <-chan struct{}
wg *sync.WaitGroup
Expand Down Expand Up @@ -204,16 +211,16 @@ func (g *gateway) DeleteEndpointSlice(epSlice *discovery.EndpointSlice) error {

}

func (g *gateway) Init(wf factory.NodeWatchFactory, stopChan <-chan struct{}, wg *sync.WaitGroup) error {
func (g *gateway) Init(stopChan <-chan struct{}, wg *sync.WaitGroup) error {
g.stopChan = stopChan
g.wg = wg

var err error
if err = g.initFunc(); err != nil {
return err
}
servicesRetryFramework := g.newRetryFrameworkNode(factory.ServiceForGatewayType)
if _, err = servicesRetryFramework.WatchResource(); err != nil {
g.servicesRetryFramework = g.newRetryFrameworkNode(factory.ServiceForGatewayType)
if _, err = g.servicesRetryFramework.WatchResource(); err != nil {
return fmt.Errorf("gateway init failed to start watching services: %v", err)
}

Expand Down Expand Up @@ -356,14 +363,64 @@ func gatewayReady(patchPort string) (bool, error) {
}

func (g *gateway) GetGatewayBridgeIface() string {
return g.openflowManager.defaultBridge.bridgeName
return g.openflowManager.getDefaultBridgeName()
}

// getMaxFrameLength returns the maximum frame size (ignoring VLAN header) that a gateway can handle
func getMaxFrameLength() int {
return config.Default.MTU + 14
}

// SetDefaultGatewayBridgeMAC updates the mac address for the OFM used to render flows with
func (g *gateway) SetDefaultGatewayBridgeMAC(macAddr net.HardwareAddr) {
g.openflowManager.setDefaultBridgeMAC(macAddr)
klog.Infof("Default gateway bridge MAC address updated to %s", macAddr)
}

// Reconcile handles triggering updates to different components of a gateway, like OFM, Services
func (g *gateway) Reconcile() error {
klog.Info("Reconciling gateway with updates")
node, err := g.watchFactory.GetNode(g.nodeIPManager.nodeName)
if err != nil {
return err
}
subnets, err := util.ParseNodeHostSubnetAnnotation(node, types.DefaultNetworkName)
if err != nil {
return fmt.Errorf("failed to get subnets for node: %s for OpenFlow cache update", node.Name)
}
if err := g.openflowManager.updateBridgeFlowCache(subnets, g.nodeIPManager.ListAddresses()); err != nil {
return err
}
// Services create OpenFlow flows as well, need to update them all
if g.servicesRetryFramework != nil {
if errs := g.addAllServices(); errs != nil {
err := kerrors.NewAggregate(errs)
return err
}
}
return nil
}

func (g *gateway) addAllServices() []error {
errs := []error{}
svcs, err := g.watchFactory.GetServices()
if err != nil {
errs = append(errs, err)
} else {
for _, svc := range svcs {
svc := *svc
klog.V(5).Infof("Adding service %s/%s to retryServices", svc.Namespace, svc.Name)
err = g.servicesRetryFramework.AddRetryObjWithAddNoBackoff(&svc)
if err != nil {
err = fmt.Errorf("failed to add service %s/%s to retry framework: %w", svc.Namespace, svc.Name, err)
errs = append(errs, err)
}
}
}
g.servicesRetryFramework.RequestRetryObjs()
return errs
}

type bridgeConfiguration struct {
sync.Mutex
bridgeName string
Expand Down
40 changes: 38 additions & 2 deletions go-controller/pkg/node/gateway_init.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"net"
"strings"

"github.com/vishvananda/netlink"
"k8s.io/klog/v2"
utilnet "k8s.io/utils/net"

Expand Down Expand Up @@ -402,7 +403,7 @@ func (nc *DefaultNodeNetworkController) initGateway(subnets []*net.IPNet, nodeAn
}

initGwFunc := func() error {
return gw.Init(nc.watchFactory, nc.stopChan, nc.wg)
return gw.Init(nc.stopChan, nc.wg)
}

readyGwFunc := func() (bool, error) {
Expand Down Expand Up @@ -501,7 +502,7 @@ func (nc *DefaultNodeNetworkController) initGatewayDPUHost(kubeNodeIP net.IP) er
return fmt.Errorf("failed to add MAC bindings for service routing")
}

err = gw.Init(nc.watchFactory, nc.stopChan, nc.wg)
err = gw.Init(nc.stopChan, nc.wg)
nc.gateway = gw
return err
}
Expand Down Expand Up @@ -534,3 +535,38 @@ func CleanupClusterNode(name string) error {

return nil
}

func (nc *DefaultNodeNetworkController) updateGatewayMAC(link netlink.Link) error {
if nc.gateway.GetGatewayBridgeIface() != link.Attrs().Name {
return nil
}

node, err := nc.watchFactory.GetNode(nc.name)
if err != nil {
return err
}
l3gwConf, err := util.ParseNodeL3GatewayAnnotation(node)
if err != nil {
return err
}

if l3gwConf == nil || l3gwConf.MACAddress.String() == link.Attrs().HardwareAddr.String() {
return nil
}
// MAC must have changed, update node
nc.gateway.SetDefaultGatewayBridgeMAC(link.Attrs().HardwareAddr)
if err := nc.gateway.Reconcile(); err != nil {
return fmt.Errorf("failed to reconcile gateway for MAC address update: %w", err)
}
nodeAnnotator := kube.NewNodeAnnotator(nc.Kube, node.Name)
l3gwConf.MACAddress = link.Attrs().HardwareAddr
if err := util.SetL3GatewayConfig(nodeAnnotator, l3gwConf); err != nil {
return fmt.Errorf("failed to update L3 gateway config annotation for node: %s, error: %w", node.Name, err)
}
if err := nodeAnnotator.Run(); err != nil {
return fmt.Errorf("failed to set node %s annotations: %w", nc.name, err)
}

return nil

}

0 comments on commit 0bfb6bd

Please sign in to comment.