Skip to content

Commit

Permalink
Fix remove remote pod when no IPs annotated
Browse files Browse the repository at this point in the history
This was recently changed incorrectly. A pod can be deleted with no IPs
annotated so don't assume that is an error.

Incidentally fix the case for a live migrated pod as well which would
have failed in the same way before.

Fixes: #3919

Signed-off-by: Jaime Caamaño Ruiz <jcaamano@redhat.com>
  • Loading branch information
jcaamano committed Oct 24, 2023
1 parent 099db07 commit ec226ba
Show file tree
Hide file tree
Showing 6 changed files with 71 additions and 13 deletions.
1 change: 1 addition & 0 deletions contrib/metallb
Submodule metallb added at f5ba91
8 changes: 4 additions & 4 deletions go-controller/pkg/kubevirt/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -146,8 +146,8 @@ func IsMigratedSourcePodStale(client *factory.WatchFactory, pod *corev1.Pod) (bo
// ZoneContainsPodSubnet will return true if the logical switch tonains
// the pod subnet and also the switch name owning it, this means that
// this zone owns the that subnet.
func ZoneContainsPodSubnet(lsManager *logicalswitchmanager.LogicalSwitchManager, podAnnotation *util.PodAnnotation) (string, bool) {
return lsManager.GetSubnetName(podAnnotation.IPs)
func ZoneContainsPodSubnet(lsManager *logicalswitchmanager.LogicalSwitchManager, ips []*net.IPNet) (string, bool) {
return lsManager.GetSubnetName(ips)
}

// nodeContainsPodSubnet will return true if the node subnet annotation
Expand Down Expand Up @@ -267,7 +267,7 @@ func allocateSyncMigratablePodIPs(watchFactory *factory.WatchFactory, lsManager
if err != nil {
return nil, "", nil, nil
}
switchName, zoneContainsPodSubnet := ZoneContainsPodSubnet(lsManager, annotation)
switchName, zoneContainsPodSubnet := ZoneContainsPodSubnet(lsManager, annotation.IPs)
// If this zone do not own the subnet or the node that is passed
// do not match the switch, they should not be deallocated
if !zoneContainsPodSubnet || (nodeName != "" && switchName != nodeName) {
Expand Down Expand Up @@ -295,7 +295,7 @@ func AllocateSyncMigratablePodIPsOnZone(watchFactory *factory.WatchFactory, lsMa
// convenience, the host subnets might not provided in which case they might be
// parsed and returned if used.
func ZoneContainsPodSubnetOrUntracked(watchFactory *factory.WatchFactory, lsManager *logicalswitchmanager.LogicalSwitchManager, hostSubnets []*net.IPNet, annotation *util.PodAnnotation) ([]*net.IPNet, bool, error) {
_, local := ZoneContainsPodSubnet(lsManager, annotation)
_, local := ZoneContainsPodSubnet(lsManager, annotation.IPs)
if local {
return nil, true, nil
}
Expand Down
2 changes: 1 addition & 1 deletion go-controller/pkg/kubevirt/router.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ func EnsureLocalZonePodAddressesToNodeRoute(watchFactory *factory.WatchFactory,
return fmt.Errorf("failed reading local pod annotation: %v", err)
}

nodeOwningSubnet, _ := ZoneContainsPodSubnet(lsManager, podAnnotation)
nodeOwningSubnet, _ := ZoneContainsPodSubnet(lsManager, podAnnotation.IPs)
vmRunningAtNodeOwningSubnet := nodeOwningSubnet == pod.Spec.NodeName
if vmRunningAtNodeOwningSubnet {
// Point to point routing is no longer needed if vm
Expand Down
2 changes: 1 addition & 1 deletion go-controller/pkg/ovn/base_network_controller_pods.go
Original file line number Diff line number Diff line change
Expand Up @@ -430,7 +430,7 @@ func (bnc *BaseNetworkController) ensurePodAnnotation(pod *kapi.Pod, nadName str
}
// The live migrated pods will have the pod annotation but the switch manager running
// at the node will not contain the switch as expected
_, zoneContainsPodSubnet := kubevirt.ZoneContainsPodSubnet(bnc.lsManager, podAnnotation)
_, zoneContainsPodSubnet := kubevirt.ZoneContainsPodSubnet(bnc.lsManager, podAnnotation.IPs)
return podAnnotation, zoneContainsPodSubnet, nil
}
podAnnotation, err := util.UnmarshalPodAnnotation(pod.Annotations, nadName)
Expand Down
14 changes: 7 additions & 7 deletions go-controller/pkg/ovn/ovn.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package ovn

import (
"bytes"
"errors"
"fmt"
"net"
"reflect"
Expand Down Expand Up @@ -279,11 +280,6 @@ func (oc *DefaultNetworkController) removeRemoteZonePod(pod *kapi.Pod) error {
}
}

podAnnotation, err := util.UnmarshalPodAnnotation(pod.Annotations, ovntypes.DefaultNetworkName)
if err != nil {
return fmt.Errorf("unable to unmarshal pod annotation to release IPs at removeRemoteZonePod: %v", err)
}

// while this check is only intended for local pods, we also need it for
// remote live migrated pods that might have been allocated from this zone
if oc.wasPodReleasedBeforeStartup(string(pod.UID), ovntypes.DefaultNetworkName) {
Expand All @@ -299,9 +295,13 @@ func (oc *DefaultNetworkController) removeRemoteZonePod(pod *kapi.Pod) error {
}

if kubevirt.IsPodLiveMigratable(pod) {
switchName, zoneContainsPodSubnet := kubevirt.ZoneContainsPodSubnet(oc.lsManager, podAnnotation)
ips, err := util.GetPodCIDRsWithFullMask(pod, oc.NetInfo)
if err != nil && !errors.Is(err, util.ErrNoPodIPFound) {
return fmt.Errorf("failed to get pod ips for the pod %s/%s : %w", pod.Namespace, pod.Name, err)
}
switchName, zoneContainsPodSubnet := kubevirt.ZoneContainsPodSubnet(oc.lsManager, ips)
if zoneContainsPodSubnet {
if err := oc.lsManager.ReleaseIPs(switchName, podAnnotation.IPs); err != nil {
if err := oc.lsManager.ReleaseIPs(switchName, ips); err != nil {
return err
}
}
Expand Down
57 changes: 57 additions & 0 deletions go-controller/pkg/ovn/pods_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"fmt"
"net"
"strings"
"sync"
"time"

"github.com/urfave/cli/v2"
Expand Down Expand Up @@ -2378,5 +2379,61 @@ var _ = ginkgo.Describe("OVN Pod Operations", func() {
err := app.Run([]string{app.Name})
gomega.Expect(err).NotTo(gomega.HaveOccurred())
})

ginkgo.It("should handle a completed remote pod with no IPs annotated", func() {
app.Action = func(ctx *cli.Context) error {
namespaceT := *newNamespace("namespace1")
t := newTPod(
"node1",
"10.128.1.0/24",
"10.128.1.2",
"10.128.1.1",
"myPod",
"10.128.1.3",
"0a:58:0a:80:01:03",
namespaceT.Name,
)
myPod := newPod(t.namespace, t.podName, t.nodeName, t.podIP)
myPod.Status.Phase = v1.PodSucceeded

fakeOvn.startWithDBSetup(initialDB,
&v1.NamespaceList{
Items: []v1.Namespace{
namespaceT,
},
},
&v1.NodeList{
Items: []v1.Node{
*newNode(node1Name, "192.168.126.202/24"),
},
},
&v1.PodList{
Items: []v1.Pod{*myPod},
},
)

// initialize the localZoneNodes empty so the controller thinks
// the node is on a remote zone
fakeOvn.controller.localZoneNodes = &sync.Map{}

err := fakeOvn.controller.WatchNamespaces()
gomega.Expect(err).NotTo(gomega.HaveOccurred())
err = fakeOvn.controller.WatchPods()
gomega.Expect(err).NotTo(gomega.HaveOccurred())

// check that the namespace AS is kept empty
fakeOvn.asf.ExpectAddressSetWithIPs(namespaceT.Name, []string{})

// check that that the pod is not being retried, it should have
// been handled synchronously and succesfully in WatchPods
podKey, err := retry.GetResourceKey(myPod)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
gomega.Expect(retry.CheckRetryObj(podKey, fakeOvn.controller.retryPods)).To(gomega.BeFalse())
return nil
}

err := app.Run([]string{app.Name})
gomega.Expect(err).NotTo(gomega.HaveOccurred())
})
})
})

0 comments on commit ec226ba

Please sign in to comment.