Skip to content

Commit

Permalink
Merge pull request #1187 from tssurya/bug-2105655
Browse files Browse the repository at this point in the history
[release-4.8] Bug 2105655: egressIP: node retrieval failure is not respected, causes panic
  • Loading branch information
openshift-merge-robot committed Jul 25, 2022
2 parents 5ece246 + 0c7f999 commit fad3aa9
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 26 deletions.
36 changes: 16 additions & 20 deletions go-controller/pkg/ovn/egressip.go
Expand Up @@ -678,22 +678,22 @@ func (oc *Controller) setNodeEgressReachable(nodeName string, isReachable bool)
}
}

func (oc *Controller) addEgressNode(egressNode *kapi.Node) error {
klog.V(5).Infof("Egress node: %s about to be initialized", egressNode.Name)
func (oc *Controller) addEgressNode(nodeName string) error {
klog.V(5).Infof("Egress node: %s about to be initialized", nodeName)
if stdout, stderr, err := util.RunOVNNbctl(
"set",
"logical_switch_port",
types.EXTSwitchToGWRouterPrefix+types.GWRouterPrefix+egressNode.Name,
types.EXTSwitchToGWRouterPrefix+types.GWRouterPrefix+nodeName,
"options:nat-addresses=router",
"options:exclude-lb-vips-from-garp=true",
); err != nil {
klog.Errorf("Unable to configure GARP on external logical switch port for egress node: %s, "+
"this will result in packet drops during egress IP re-assignment, stdout: %s, stderr: %s, err: %v", egressNode.Name, stdout, stderr, err)
"this will result in packet drops during egress IP re-assignment, stdout: %s, stderr: %s, err: %v", nodeName, stdout, stderr, err)
}
oc.eIPC.assignmentRetryMutex.Lock()
defer oc.eIPC.assignmentRetryMutex.Unlock()
for eIPName := range oc.eIPC.assignmentRetry {
klog.V(5).Infof("Re-assignment for EgressIP: %s attempted by new node: %s", eIPName, egressNode.Name)
klog.V(5).Infof("Re-assignment for EgressIP: %s attempted by new node: %s", eIPName, nodeName)
eIP, err := oc.kube.GetEgressIP(eIPName)
if errors.IsNotFound(err) {
klog.Errorf("Re-assignment for EgressIP: EgressIP: %s not found in the api-server, err: %v", eIPName, err)
Expand All @@ -716,17 +716,17 @@ func (oc *Controller) addEgressNode(egressNode *kapi.Node) error {
return nil
}

func (oc *Controller) deleteEgressNode(egressNode *kapi.Node) error {
klog.V(5).Infof("Egress node: %s about to be removed", egressNode.Name)
func (oc *Controller) deleteEgressNode(nodeName string) error {
klog.V(5).Infof("Egress node: %s about to be removed", nodeName)
if stdout, stderr, err := util.RunOVNNbctl(
"remove",
"logical_switch_port",
types.EXTSwitchToGWRouterPrefix+types.GWRouterPrefix+egressNode.Name,
types.EXTSwitchToGWRouterPrefix+types.GWRouterPrefix+nodeName,
"options",
"nat-addresses=router",
"exclude-lb-vips-from-garp=true",
); err != nil {
klog.Errorf("Unable to remove GARP configuration on external logical switch port for egress node: %s, stdout: %s, stderr: %s, err: %v", egressNode.Name, stdout, stderr, err)
klog.Errorf("Unable to remove GARP configuration on external logical switch port for egress node: %s, stdout: %s, stderr: %s, err: %v", nodeName, stdout, stderr, err)
}

oc.eIPC.assignmentRetryMutex.Lock()
Expand All @@ -738,7 +738,7 @@ func (oc *Controller) deleteEgressNode(egressNode *kapi.Node) error {
for _, eIP := range egressIPs.Items {
needsReassignment := false
for _, status := range eIP.Status.Items {
if status.Node == egressNode.Name {
if status.Node == nodeName {
needsReassignment = true
break
}
Expand Down Expand Up @@ -1180,19 +1180,15 @@ func (oc *Controller) checkEgressNodesReachability() {
}
oc.eIPC.allocatorMutex.Unlock()
for nodeName, shouldDelete := range reAddOrDelete {
node, err := oc.kube.GetNode(nodeName)
if err != nil {
klog.Errorf("Node: %s reachability changed, but could not retrieve node from API server, err: %v", node.Name, err)
}
if shouldDelete {
klog.Warningf("Node: %s is detected as unreachable, deleting it from egress assignment", node.Name)
if err := oc.deleteEgressNode(node); err != nil {
klog.Errorf("Node: %s is detected as unreachable, but could not re-assign egress IPs, err: %v", node.Name, err)
klog.Warningf("Node: %s is detected as unreachable, deleting it from egress assignment", nodeName)
if err := oc.deleteEgressNode(nodeName); err != nil {
klog.Errorf("Node: %s is detected as unreachable, but could not re-assign egress IPs, err: %v", nodeName, err)
}
} else {
klog.Infof("Node: %s is detected as reachable and ready again, adding it to egress assignment", node.Name)
if err := oc.addEgressNode(node); err != nil {
klog.Errorf("Node: %s is detected as reachable and ready again, but could not re-assign egress IPs, err: %v", node.Name, err)
klog.Infof("Node: %s is detected as reachable and ready again, adding it to egress assignment", nodeName)
if err := oc.addEgressNode(nodeName); err != nil {
klog.Errorf("Node: %s is detected as reachable and ready again, but could not re-assign egress IPs, err: %v", nodeName, err)
}
}
}
Expand Down
12 changes: 6 additions & 6 deletions go-controller/pkg/ovn/ovn.go
Expand Up @@ -770,7 +770,7 @@ func (oc *Controller) WatchEgressNodes() {
oc.setNodeEgressReachable(node.Name, true)
}
if hasEgressLabel && isReachable && isReady {
if err := oc.addEgressNode(node); err != nil {
if err := oc.addEgressNode(node.Name); err != nil {
klog.Error(err)
}
}
Expand All @@ -791,7 +791,7 @@ func (oc *Controller) WatchEgressNodes() {
if oldHadEgressLabel && !newHasEgressLabel {
klog.Infof("Node: %s has been un-labelled, deleting it from egress assignment", newNode.Name)
oc.setNodeEgressAssignable(oldNode.Name, false)
if err := oc.deleteEgressNode(oldNode); err != nil {
if err := oc.deleteEgressNode(oldNode.Name); err != nil {
klog.Error(err)
}
return
Expand All @@ -805,7 +805,7 @@ func (oc *Controller) WatchEgressNodes() {
klog.Infof("Node: %s has been labelled, adding it for egress assignment", newNode.Name)
oc.setNodeEgressAssignable(newNode.Name, true)
if isNewReady && isNewReachable {
if err := oc.addEgressNode(newNode); err != nil {
if err := oc.addEgressNode(newNode.Name); err != nil {
klog.Error(err)
}
} else {
Expand All @@ -818,12 +818,12 @@ func (oc *Controller) WatchEgressNodes() {
}
if !isNewReady {
klog.Warningf("Node: %s is not ready, deleting it from egress assignment", newNode.Name)
if err := oc.deleteEgressNode(newNode); err != nil {
if err := oc.deleteEgressNode(newNode.Name); err != nil {
klog.Error(err)
}
} else if isNewReady && isNewReachable {
klog.Infof("Node: %s is ready and reachable, adding it for egress assignment", newNode.Name)
if err := oc.addEgressNode(newNode); err != nil {
if err := oc.addEgressNode(newNode.Name); err != nil {
klog.Error(err)
}
}
Expand All @@ -835,7 +835,7 @@ func (oc *Controller) WatchEgressNodes() {
}
nodeLabels := node.GetLabels()
if _, hasEgressLabel := nodeLabels[nodeEgressLabel]; hasEgressLabel {
if err := oc.deleteEgressNode(node); err != nil {
if err := oc.deleteEgressNode(node.Name); err != nil {
klog.Error(err)
}
}
Expand Down

0 comments on commit fad3aa9

Please sign in to comment.