Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix remove remote pod when no IPs annotated #3976

Merged
merged 1 commit into from
Oct 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
8 changes: 4 additions & 4 deletions go-controller/pkg/kubevirt/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -146,8 +146,8 @@ func IsMigratedSourcePodStale(client *factory.WatchFactory, pod *corev1.Pod) (bo
// ZoneContainsPodSubnet will return true if the logical switch tonains
// the pod subnet and also the switch name owning it, this means that
// this zone owns the that subnet.
func ZoneContainsPodSubnet(lsManager *logicalswitchmanager.LogicalSwitchManager, podAnnotation *util.PodAnnotation) (string, bool) {
return lsManager.GetSubnetName(podAnnotation.IPs)
func ZoneContainsPodSubnet(lsManager *logicalswitchmanager.LogicalSwitchManager, ips []*net.IPNet) (string, bool) {
return lsManager.GetSubnetName(ips)
}

// nodeContainsPodSubnet will return true if the node subnet annotation
Expand Down Expand Up @@ -267,7 +267,7 @@ func allocateSyncMigratablePodIPs(watchFactory *factory.WatchFactory, lsManager
if err != nil {
return nil, "", nil, nil
}
switchName, zoneContainsPodSubnet := ZoneContainsPodSubnet(lsManager, annotation)
switchName, zoneContainsPodSubnet := ZoneContainsPodSubnet(lsManager, annotation.IPs)
// If this zone do not own the subnet or the node that is passed
// do not match the switch, they should not be deallocated
if !zoneContainsPodSubnet || (nodeName != "" && switchName != nodeName) {
Expand Down Expand Up @@ -295,7 +295,7 @@ func AllocateSyncMigratablePodIPsOnZone(watchFactory *factory.WatchFactory, lsMa
// convenience, the host subnets might not provided in which case they might be
// parsed and returned if used.
func ZoneContainsPodSubnetOrUntracked(watchFactory *factory.WatchFactory, lsManager *logicalswitchmanager.LogicalSwitchManager, hostSubnets []*net.IPNet, annotation *util.PodAnnotation) ([]*net.IPNet, bool, error) {
_, local := ZoneContainsPodSubnet(lsManager, annotation)
_, local := ZoneContainsPodSubnet(lsManager, annotation.IPs)
if local {
return nil, true, nil
}
Expand Down
2 changes: 1 addition & 1 deletion go-controller/pkg/kubevirt/router.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ func EnsureLocalZonePodAddressesToNodeRoute(watchFactory *factory.WatchFactory,
return fmt.Errorf("failed reading local pod annotation: %v", err)
}

nodeOwningSubnet, _ := ZoneContainsPodSubnet(lsManager, podAnnotation)
nodeOwningSubnet, _ := ZoneContainsPodSubnet(lsManager, podAnnotation.IPs)
vmRunningAtNodeOwningSubnet := nodeOwningSubnet == pod.Spec.NodeName
if vmRunningAtNodeOwningSubnet {
// Point to point routing is no longer needed if vm
Expand Down
2 changes: 1 addition & 1 deletion go-controller/pkg/ovn/base_network_controller_pods.go
Original file line number Diff line number Diff line change
Expand Up @@ -430,7 +430,7 @@ func (bnc *BaseNetworkController) ensurePodAnnotation(pod *kapi.Pod, nadName str
}
// The live migrated pods will have the pod annotation but the switch manager running
// at the node will not contain the switch as expected
_, zoneContainsPodSubnet := kubevirt.ZoneContainsPodSubnet(bnc.lsManager, podAnnotation)
_, zoneContainsPodSubnet := kubevirt.ZoneContainsPodSubnet(bnc.lsManager, podAnnotation.IPs)
return podAnnotation, zoneContainsPodSubnet, nil
}
podAnnotation, err := util.UnmarshalPodAnnotation(pod.Annotations, nadName)
Expand Down
16 changes: 8 additions & 8 deletions go-controller/pkg/ovn/ovn.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package ovn

import (
"bytes"
"errors"
"fmt"
"net"
"reflect"
Expand Down Expand Up @@ -279,11 +280,6 @@ func (oc *DefaultNetworkController) removeRemoteZonePod(pod *kapi.Pod) error {
}
}

podAnnotation, err := util.UnmarshalPodAnnotation(pod.Annotations, ovntypes.DefaultNetworkName)
if err != nil {
return fmt.Errorf("unable to unmarshal pod annotation to release IPs at removeRemoteZonePod: %v", err)
}

// while this check is only intended for local pods, we also need it for
// remote live migrated pods that might have been allocated from this zone
if oc.wasPodReleasedBeforeStartup(string(pod.UID), ovntypes.DefaultNetworkName) {
Expand All @@ -295,13 +291,17 @@ func (oc *DefaultNetworkController) removeRemoteZonePod(pod *kapi.Pod) error {
}

if err := oc.removeRemoteZonePodFromNamespaceAddressSet(pod); err != nil {
return fmt.Errorf("failed to remove the remote zone pod : %w", err)
return fmt.Errorf("failed to remove the remote zone pod: %w", err)
}

if kubevirt.IsPodLiveMigratable(pod) {
switchName, zoneContainsPodSubnet := kubevirt.ZoneContainsPodSubnet(oc.lsManager, podAnnotation)
ips, err := util.GetPodCIDRsWithFullMask(pod, oc.NetInfo)
if err != nil && !errors.Is(err, util.ErrNoPodIPFound) {
return fmt.Errorf("failed to get pod ips for the pod %s/%s: %w", pod.Namespace, pod.Name, err)
}
switchName, zoneContainsPodSubnet := kubevirt.ZoneContainsPodSubnet(oc.lsManager, ips)
if zoneContainsPodSubnet {
if err := oc.lsManager.ReleaseIPs(switchName, podAnnotation.IPs); err != nil {
if err := oc.lsManager.ReleaseIPs(switchName, ips); err != nil {
return err
}
}
Expand Down
62 changes: 62 additions & 0 deletions go-controller/pkg/ovn/pods_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"fmt"
"net"
"strings"
"sync"
"time"

"github.com/urfave/cli/v2"
Expand Down Expand Up @@ -2378,5 +2379,66 @@ var _ = ginkgo.Describe("OVN Pod Operations", func() {
err := app.Run([]string{app.Name})
gomega.Expect(err).NotTo(gomega.HaveOccurred())
})

ginkgo.It("should handle a scheduled or failed remote pod with no IPs annotated", func() {
app.Action = func(ctx *cli.Context) error {
namespaceT := *newNamespace("namespace1")
t := newTPod(
"node1",
"10.128.1.0/24",
"10.128.1.2",
"10.128.1.1",
"myPod",
"10.128.1.3",
"0a:58:0a:80:01:03",
namespaceT.Name,
)
myPod := newPod(t.namespace, t.podName, t.nodeName, t.podIP)

// testing how a scheduled non-annotated pod is handled is
// tricky, let's settle with a failed pod which should be
// handled in a similar way
myPod.Status.Phase = v1.PodFailed

fakeOvn.startWithDBSetup(initialDB,
&v1.NamespaceList{
Items: []v1.Namespace{
namespaceT,
},
},
&v1.NodeList{
Items: []v1.Node{
*newNode(node1Name, "192.168.126.202/24"),
},
},
&v1.PodList{
Items: []v1.Pod{*myPod},
},
)

// initialize the localZoneNodes empty so the controller thinks
// the node is on a remote zone
fakeOvn.controller.localZoneNodes = &sync.Map{}

err := fakeOvn.controller.WatchNamespaces()
gomega.Expect(err).NotTo(gomega.HaveOccurred())
err = fakeOvn.controller.WatchPods()
gomega.Expect(err).NotTo(gomega.HaveOccurred())

// check that the pod is not being retried, it should have been
// handled synchronously and succesfully in WatchPods
podKey, err := retry.GetResourceKey(myPod)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
gomega.Expect(retry.CheckRetryObj(podKey, fakeOvn.controller.retryPods)).To(gomega.BeFalse())

// check that the namespace AS is kept empty
fakeOvn.asf.ExpectAddressSetWithIPs(namespaceT.Name, []string{})

return nil
}

err := app.Run([]string{app.Name})
gomega.Expect(err).NotTo(gomega.HaveOccurred())
})
})
})