Skip to content

Commit

Permalink
Merge pull request #2090 from pperiyasamy/skip-eip-for-vip
Browse files Browse the repository at this point in the history
OCPBUGS-29851: Egress IP, Services: use all node IP addresses
  • Loading branch information
openshift-merge-bot[bot] committed Mar 8, 2024
2 parents 622b6ab + 6d05a59 commit 9f1ac2c
Show file tree
Hide file tree
Showing 11 changed files with 486 additions and 280 deletions.
Expand Up @@ -40,10 +40,8 @@ const (

type InitClusterEgressPoliciesFunc func(client libovsdbclient.Client, addressSetFactory addressset.AddressSetFactory,
controllerName string) error
type CreateNoRerouteNodePoliciesFunc func(client libovsdbclient.Client, addressSetFactory addressset.AddressSetFactory,
node *corev1.Node, controllerName string) error
type DeleteNoRerouteNodePoliciesFunc func(addressSetFactory addressset.AddressSetFactory, nodeName string,
v4NodeAddr, v6NodeAddr net.IP, controllerName string) error
type EnsureNoRerouteNodePoliciesFunc func(client libovsdbclient.Client, addressSetFactory addressset.AddressSetFactory,
controllerName string, nodeLister corelisters.NodeLister) error
type DeleteLegacyDefaultNoRerouteNodePoliciesFunc func(libovsdbclient.Client, string) error

type Controller struct {
Expand All @@ -54,8 +52,7 @@ type Controller struct {
sync.Mutex

initClusterEgressPolicies InitClusterEgressPoliciesFunc
createNoRerouteNodePolicies CreateNoRerouteNodePoliciesFunc
deleteNoRerouteNodePolicies DeleteNoRerouteNodePoliciesFunc
ensureNoRerouteNodePolicies EnsureNoRerouteNodePoliciesFunc
deleteLegacyDefaultNoRerouteNodePolicies DeleteLegacyDefaultNoRerouteNodePoliciesFunc
IsReachable func(nodeName string, mgmtIPs []net.IP, healthClient healthcheck.EgressIPHealthClient) bool // TODO: make a universal cache instead

Expand Down Expand Up @@ -91,17 +88,15 @@ type svcState struct {
}

type nodeState struct {
name string
labels map[string]string
mgmtIPs []net.IP
v4MgmtIP net.IP
v6MgmtIP net.IP
v4InternalNodeIP net.IP
v6InternalNodeIP net.IP
healthClient healthcheck.EgressIPHealthClient
allocations map[string]*svcState // svc key -> state
reachable bool
draining bool
name string
labels map[string]string
mgmtIPs []net.IP
v4MgmtIP net.IP
v6MgmtIP net.IP
healthClient healthcheck.EgressIPHealthClient
allocations map[string]*svcState // svc key -> state
reachable bool
draining bool
}

func NewController(
Expand All @@ -110,8 +105,7 @@ func NewController(
nbClient libovsdbclient.Client,
addressSetFactory addressset.AddressSetFactory,
initClusterEgressPolicies InitClusterEgressPoliciesFunc,
createNoRerouteNodePolicies CreateNoRerouteNodePoliciesFunc,
deleteNoRerouteNodePolicies DeleteNoRerouteNodePoliciesFunc,
ensureNoRerouteNodePolicies EnsureNoRerouteNodePoliciesFunc,
deleteLegacyDefaultNoRerouteNodePolicies DeleteLegacyDefaultNoRerouteNodePoliciesFunc,
isReachable func(nodeName string, mgmtIPs []net.IP, healthClient healthcheck.EgressIPHealthClient) bool,
stopCh <-chan struct{},
Expand All @@ -125,8 +119,7 @@ func NewController(
nbClient: nbClient,
addressSetFactory: addressSetFactory,
initClusterEgressPolicies: initClusterEgressPolicies,
createNoRerouteNodePolicies: createNoRerouteNodePolicies,
deleteNoRerouteNodePolicies: deleteNoRerouteNodePolicies,
ensureNoRerouteNodePolicies: ensureNoRerouteNodePolicies,
deleteLegacyDefaultNoRerouteNodePolicies: deleteLegacyDefaultNoRerouteNodePolicies,
IsReachable: isReachable,
stopCh: stopCh,
Expand Down
Expand Up @@ -102,9 +102,11 @@ func (c *Controller) onNodeUpdate(oldObj, newObj interface{}) {
oldNodeReady := nodeIsReady(oldNode)
newNodeReady := nodeIsReady(newNode)

// We only care about node updates that relate to readiness or label changes
// We only care about node updates that relate to readiness, labels or
// addresses
if labels.Equals(oldNodeLabels, newNodeLabels) &&
oldNodeReady == newNodeReady {
oldNodeReady == newNodeReady &&
!util.NodeHostAddressesAnnotationChanged(oldNode, newNode) {
return
}

Expand Down Expand Up @@ -175,6 +177,13 @@ func (c *Controller) syncNode(key string) error {
return err
}

// We ensure node no re-route policies contemplating possible node IP
// address changes regardless of allocated services.
err = c.ensureNoRerouteNodePolicies(c.nbClient, c.addressSetFactory, c.controllerName, c.nodeLister)
if err != nil {
return err
}

n, err := c.nodeLister.Get(nodeName)
if err != nil && !apierrors.IsNotFound(err) {
return err
Expand All @@ -196,21 +205,10 @@ func (c *Controller) syncNode(key string) error {
}
delete(c.nodes, nodeName)
state.healthClient.Disconnect()
} else {
// we don't have a node at this point (node deleted?) and we don't have its cache
// entry (state==nil) as well. Maybe state was deleted when node became nodeReady or unreachable
// nothing to sync here
return nil
}

return c.deleteNoRerouteNodePolicies(c.addressSetFactory, nodeName, state.v4InternalNodeIP,
state.v6InternalNodeIP, c.controllerName)
}

// We create the per-node reroute policies as long as it has a resource (n != nil at this point),
// regardless if it was allocated services or not.
if err := c.createNoRerouteNodePolicies(c.nbClient, c.addressSetFactory, n, c.controllerName); err != nil {
return err
// nothing to sync here
return nil
}

nodeReady := nodeIsReady(n)
Expand Down Expand Up @@ -316,9 +314,7 @@ func (c *Controller) nodeStateFor(name string) (*nodeState, error) {
v6IP = ip
}

v4NodeAddr, v6NodeAddr := util.GetNodeInternalAddrs(node)

return &nodeState{name: name, mgmtIPs: mgmtIPs, v4MgmtIP: v4IP, v6MgmtIP: v6IP, v4InternalNodeIP: v4NodeAddr, v6InternalNodeIP: v6NodeAddr,
return &nodeState{name: name, mgmtIPs: mgmtIPs, v4MgmtIP: v4IP, v6MgmtIP: v6IP,
healthClient: healthcheck.NewEgressIPHealthClient(name), allocations: map[string]*svcState{}, labels: node.Labels,
reachable: true, draining: false}, nil
}
Expand Down
110 changes: 4 additions & 106 deletions go-controller/pkg/ovn/default_network_controller.go
Expand Up @@ -132,8 +132,6 @@ type DefaultNetworkController struct {
retryEgressNodes *retry.RetryFramework
// retry framework for Egress Firewall Nodes
retryEgressFwNodes *retry.RetryFramework
// EgressIP Node-specific syncMap used by egressip node event handler
addEgressNodeFailed sync.Map

// Node-specific syncMaps used by node event handler
gatewaysFailed sync.Map
Expand Down Expand Up @@ -200,6 +198,7 @@ func newDefaultNetworkControllerCommon(cnci *CommonNetworkControllerInfo,
eIPC: egressIPController{
egressIPAssignmentMutex: &sync.Mutex{},
podAssignmentMutex: &sync.Mutex{},
nodeIPUpdateMutex: &sync.Mutex{},
podAssignment: make(map[string]*podAssignmentState),
pendingCloudPrivateIPConfigsMutex: &sync.Mutex{},
pendingCloudPrivateIPConfigsOps: make(map[string]map[string]*cloudPrivateIPConfigOp),
Expand Down Expand Up @@ -786,26 +785,7 @@ func (h *defaultNetworkControllerEventHandler) AddResource(obj interface{}, from

case factory.EgressNodeType:
node := obj.(*kapi.Node)
if err := h.oc.setupNodeForEgress(node); err != nil {
return err
}
nodeEgressLabel := util.GetNodeEgressLabel()
nodeLabels := node.GetLabels()
_, hasEgressLabel := nodeLabels[nodeEgressLabel]
if hasEgressLabel {
h.oc.setNodeEgressAssignable(node.Name, true)
}
isReady := h.oc.isEgressNodeReady(node)
if isReady {
h.oc.setNodeEgressReady(node.Name, true)
}
isReachable := h.oc.isEgressNodeReachable(node)
if hasEgressLabel && isReachable && isReady {
h.oc.setNodeEgressReachable(node.Name, true)
if err := h.oc.addEgressNode(node.Name); err != nil {
return err
}
}
return h.oc.reconcileNodeForEgressIP(nil, node)

case factory.EgressFwNodeType:
node := obj.(*kapi.Node)
Expand Down Expand Up @@ -896,79 +876,7 @@ func (h *defaultNetworkControllerEventHandler) UpdateResource(oldObj, newObj int
case factory.EgressNodeType:
oldNode := oldObj.(*kapi.Node)
newNode := newObj.(*kapi.Node)

// Check if the node's internal addresses changed. If so,
// delete and readd the node for egress to update LR policies.
// We are only interested in the IPs here, not the subnet information.
oldV4Addr, oldV6Addr := util.GetNodeInternalAddrs(oldNode)
newV4Addr, newV6Addr := util.GetNodeInternalAddrs(newNode)
if !oldV4Addr.Equal(newV4Addr) || !oldV6Addr.Equal(newV6Addr) {
klog.Infof("Egress IP detected IP address change. Recreating node %s for Egress IP.", newNode.Name)
if err := h.oc.deleteNodeForEgress(oldNode); err != nil {
klog.Error(err)
}
if err := h.oc.setupNodeForEgress(newNode); err != nil {
klog.Error(err)
}
}

// Initialize the allocator on every update,
// ovnkube-node/cloud-network-config-controller will make sure to
// annotate the node with the egressIPConfig, but that might have
// happened after we processed the ADD for that object, hence keep
// retrying for all UPDATEs.
if err := h.oc.initEgressIPAllocator(newNode); err != nil {
klog.Warningf("Egress node initialization error: %v", err)
}
nodeEgressLabel := util.GetNodeEgressLabel()
oldLabels := oldNode.GetLabels()
newLabels := newNode.GetLabels()
_, oldHadEgressLabel := oldLabels[nodeEgressLabel]
_, newHasEgressLabel := newLabels[nodeEgressLabel]
// If the node is not labeled for egress assignment, just return
// directly, we don't really need to set the ready / reachable
// status on this node if the user doesn't care about using it.
if !oldHadEgressLabel && !newHasEgressLabel {
return nil
}
h.oc.setNodeEgressAssignable(newNode.Name, newHasEgressLabel)
if oldHadEgressLabel && !newHasEgressLabel {
klog.Infof("Node: %s has been un-labeled, deleting it from egress assignment", newNode.Name)
return h.oc.deleteEgressNode(oldNode.Name)
}
isOldReady := h.oc.isEgressNodeReady(oldNode)
isNewReady := h.oc.isEgressNodeReady(newNode)
isNewReachable := h.oc.isEgressNodeReachable(newNode)
h.oc.setNodeEgressReady(newNode.Name, isNewReady)
if !oldHadEgressLabel && newHasEgressLabel {
klog.Infof("Node: %s has been labeled, adding it for egress assignment", newNode.Name)
if isNewReady && isNewReachable {
h.oc.setNodeEgressReachable(newNode.Name, isNewReachable)
if err := h.oc.addEgressNode(newNode.Name); err != nil {
return err
}
} else {
klog.Warningf("Node: %s has been labeled, but node is not ready"+
" and reachable, cannot use it for egress assignment", newNode.Name)
}
return nil
}
if isOldReady == isNewReady {
return nil
}
if !isNewReady {
klog.Warningf("Node: %s is not ready, deleting it from egress assignment", newNode.Name)
if err := h.oc.deleteEgressNode(newNode.Name); err != nil {
return err
}
} else if isNewReady && isNewReachable {
klog.Infof("Node: %s is ready and reachable, adding it for egress assignment", newNode.Name)
h.oc.setNodeEgressReachable(newNode.Name, isNewReachable)
if err := h.oc.addEgressNode(newNode.Name); err != nil {
return err
}
}
return nil
return h.oc.reconcileNodeForEgressIP(oldNode, newNode)

case factory.EgressFwNodeType:
oldNode := oldObj.(*kapi.Node)
Expand Down Expand Up @@ -1057,17 +965,7 @@ func (h *defaultNetworkControllerEventHandler) DeleteResource(obj, cachedObj int

case factory.EgressNodeType:
node := obj.(*kapi.Node)
if err := h.oc.deleteNodeForEgress(node); err != nil {
return err
}
nodeEgressLabel := util.GetNodeEgressLabel()
nodeLabels := node.GetLabels()
if _, hasEgressLabel := nodeLabels[nodeEgressLabel]; hasEgressLabel {
if err := h.oc.deleteEgressNode(node.Name); err != nil {
return err
}
}
return nil
return h.oc.reconcileNodeForEgressIP(node, nil)

case factory.EgressFwNodeType:
node, ok := obj.(*kapi.Node)
Expand Down

0 comments on commit 9f1ac2c

Please sign in to comment.