Skip to content

Commit

Permalink
Merge pull request #1232 from dcbw/dont-list-pods-on-node-update
Browse files Browse the repository at this point in the history
[release-4.11] Bug 2115481: master: don't list node pods on add/update unless necessary
  • Loading branch information
openshift-merge-robot committed Aug 23, 2022
2 parents 2c605e0 + 451048c commit bc09444
Show file tree
Hide file tree
Showing 2 changed files with 217 additions and 16 deletions.
35 changes: 19 additions & 16 deletions go-controller/pkg/ovn/master.go
Original file line number Diff line number Diff line change
Expand Up @@ -1378,26 +1378,29 @@ func (oc *Controller) addUpdateNodeEvent(node *kapi.Node, nSyncs *nodeSyncs) err
}

// ensure pods that already exist on this node have their logical ports created
options := metav1.ListOptions{FieldSelector: fields.OneTermEqualSelector("spec.nodeName", node.Name).String()}
pods, err := oc.client.CoreV1().Pods(metav1.NamespaceAll).List(context.TODO(), options)
if err != nil {
klog.Errorf("Unable to list existing pods on node: %s, existing pods on this node may not function")
} else if nSyncs.syncNode { // do this only if its a new node add
klog.V(5).Infof("When adding node %s, found %d pods to add to retryPods", node.Name, len(pods.Items))
for _, pod := range pods.Items {
pod := pod
if util.PodCompleted(&pod) {
continue
// if per pod SNAT is being used, then l3 gateway config is required to be able to add pods
if _, gwFailed := oc.gatewaysFailed.Load(node.Name); !gwFailed || !config.Gateway.DisableSNATMultipleGWs {
if nSyncs.syncNode || nSyncs.syncGw { // do this only if it is a new node add or a gateway sync happened
options := metav1.ListOptions{FieldSelector: fields.OneTermEqualSelector("spec.nodeName", node.Name).String()}
pods, err := oc.client.CoreV1().Pods(metav1.NamespaceAll).List(context.TODO(), options)
if err != nil {
errs = append(errs, err)
klog.Errorf("Unable to list existing pods on node: %s, existing pods on this node may not function")
} else {
klog.V(5).Infof("When adding node %s, found %d pods to add to retryPods", node.Name, len(pods.Items))
for _, pod := range pods.Items {
pod := pod
if util.PodCompleted(&pod) {
continue
}
klog.V(5).Infof("Adding pod %s/%s to retryPods", pod.Namespace, pod.Name)
oc.retryPods.addRetryObj(&pod)
}
}
klog.V(5).Infof("Adding pod %s/%s to retryPods", pod.Namespace, pod.Name)
oc.retryPods.addRetryObj(&pod)
oc.retryPods.requestRetryObjs()
}
oc.retryPods.requestRetryObjs()
}

if len(errs) == 0 {
return nil
}
return kerrors.NewAggregate(errs)
}

Expand Down
198 changes: 198 additions & 0 deletions go-controller/pkg/ovn/master_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"reflect"
"strconv"
"sync"
"sync/atomic"
"testing"

"github.com/onsi/ginkgo"
Expand All @@ -30,8 +31,10 @@ import (
kapi "k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/client-go/kubernetes/fake"
clienttesting "k8s.io/client-go/testing"
"k8s.io/client-go/tools/record"
)

Expand Down Expand Up @@ -1234,6 +1237,201 @@ var _ = ginkgo.Describe("Gateway Init Operations", func() {
gomega.Expect(err).NotTo(gomega.HaveOccurred())
})

ginkgo.It("does not list node's pods when updating node after successfully adding the node", func() {
app.Action = func(ctx *cli.Context) error {
node1 := tNode{
Name: "node1",
NodeIP: "1.2.3.4",
NodeLRPMAC: "0a:58:0a:01:01:01",
LrpMAC: "0a:58:64:40:00:02",
LrpIP: "100.64.0.2",
LrpIPv6: "fd98::2",
DrLrpIP: "100.64.0.1",
PhysicalBridgeMAC: "11:22:33:44:55:66",
SystemID: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac6",
NodeSubnet: "10.1.1.0/24",
GWRouter: types.GWRouterPrefix + "node1",
GatewayRouterIPMask: "172.16.16.2/24",
GatewayRouterIP: "172.16.16.2",
GatewayRouterNextHop: "172.16.16.1",
PhysicalBridgeName: "br-eth0",
NodeGWIP: "10.1.1.1/24",
NodeMgmtPortIP: "10.1.1.2",
NodeMgmtPortMAC: "0a:58:0a:01:01:02",
DnatSnatIP: "169.254.0.1",
}

testNode := v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: node1.Name,
},
Status: kapi.NodeStatus{
Addresses: []kapi.NodeAddress{
{
Type: kapi.NodeExternalIP,
Address: node1.NodeIP,
},
},
},
}

kubeFakeClient := fake.NewSimpleClientset(&v1.NodeList{
Items: []v1.Node{testNode},
})
egressFirewallFakeClient := &egressfirewallfake.Clientset{}
egressIPFakeClient := &egressipfake.Clientset{}
fakeClient := &util.OVNClientset{
KubeClient: kubeFakeClient,
EgressIPClient: egressIPFakeClient,
EgressFirewallClient: egressFirewallFakeClient,
}

_, err := config.InitConfig(ctx, nil, nil)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
config.Kubernetes.HostNetworkNamespace = ""
nodeAnnotator := kube.NewNodeAnnotator(&kube.Kube{kubeFakeClient, egressIPFakeClient, egressFirewallFakeClient, nil}, testNode.Name)
ifaceID := node1.PhysicalBridgeName + "_" + node1.Name
vlanID := uint(1024)
l3Config := &util.L3GatewayConfig{
Mode: config.GatewayModeShared,
ChassisID: node1.SystemID,
InterfaceID: ifaceID,
MACAddress: ovntest.MustParseMAC(node1.PhysicalBridgeMAC),
IPAddresses: ovntest.MustParseIPNets(node1.GatewayRouterIPMask),
NextHops: ovntest.MustParseIPs(node1.GatewayRouterNextHop),
NodePortEnable: true,
VLANID: &vlanID,
}
err = util.SetL3GatewayConfig(nodeAnnotator, l3Config)
err = util.SetNodeManagementPortMACAddress(nodeAnnotator, ovntest.MustParseMAC(node1.NodeMgmtPortMAC))
gomega.Expect(err).NotTo(gomega.HaveOccurred())
err = util.SetNodeHostSubnetAnnotation(nodeAnnotator, ovntest.MustParseIPNets(node1.NodeSubnet))
gomega.Expect(err).NotTo(gomega.HaveOccurred())
err = util.SetNodeHostAddresses(nodeAnnotator, sets.NewString("9.9.9.9"))
gomega.Expect(err).NotTo(gomega.HaveOccurred())
err = nodeAnnotator.Run()
gomega.Expect(err).NotTo(gomega.HaveOccurred())

f, err = factory.NewMasterWatchFactory(fakeClient)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
err = f.Start()
gomega.Expect(err).NotTo(gomega.HaveOccurred())

expectedClusterLBGroup := &nbdb.LoadBalancerGroup{
Name: types.ClusterLBGroupName,
UUID: types.ClusterLBGroupName + "-UUID",
}
expectedOVNClusterRouter := &nbdb.LogicalRouter{
UUID: types.OVNClusterRouter + "-UUID",
Name: types.OVNClusterRouter,
}
expectedNodeSwitch := &nbdb.LogicalSwitch{
UUID: node1.Name + "-UUID",
Name: node1.Name,
OtherConfig: map[string]string{"subnet": node1.NodeSubnet},
LoadBalancerGroup: []string{expectedClusterLBGroup.UUID},
}
expectedClusterRouterPortGroup := &nbdb.PortGroup{
UUID: types.ClusterRtrPortGroupName + "-UUID",
Name: types.ClusterRtrPortGroupName,
ExternalIDs: map[string]string{
"name": types.ClusterRtrPortGroupName,
},
}
expectedClusterPortGroup := &nbdb.PortGroup{
UUID: types.ClusterPortGroupName + "-UUID",
Name: types.ClusterPortGroupName,
ExternalIDs: map[string]string{
"name": types.ClusterPortGroupName,
},
}
dbSetup := libovsdbtest.TestSetup{
NBData: []libovsdbtest.TestData{
&nbdb.LogicalSwitch{
UUID: types.OVNJoinSwitch + "-UUID",
Name: types.OVNJoinSwitch,
},
expectedNodeSwitch,
expectedOVNClusterRouter,
expectedClusterRouterPortGroup,
expectedClusterPortGroup,
expectedClusterLBGroup,
},
}
var libovsdbOvnNBClient, libovsdbOvnSBClient libovsdbclient.Client
libovsdbOvnNBClient, libovsdbOvnSBClient, libovsdbCleanup, err = libovsdbtest.NewNBSBTestHarness(dbSetup)
gomega.Expect(err).NotTo(gomega.HaveOccurred())

expectedDatabaseState := []libovsdbtest.TestData{}
expectedDatabaseState = addNodeLogicalFlows(expectedDatabaseState, expectedOVNClusterRouter, expectedNodeSwitch, expectedClusterRouterPortGroup, expectedClusterPortGroup, &node1)

clusterController := NewOvnController(fakeClient, f, stopChan, addressset.NewFakeAddressSetFactory(),
libovsdbOvnNBClient, libovsdbOvnSBClient,
record.NewFakeRecorder(0))
clusterController.loadBalancerGroupUUID = expectedClusterLBGroup.UUID
gomega.Expect(clusterController).NotTo(gomega.BeNil())
clusterController.defaultGatewayCOPPUUID, err = EnsureDefaultCOPP(libovsdbOvnNBClient)
gomega.Expect(err).NotTo(gomega.HaveOccurred())

clusterController.SCTPSupport = true
clusterController.joinSwIPManager, _ = lsm.NewJoinLogicalSwitchIPManager(clusterController.nbClient, expectedNodeSwitch.UUID, []string{node1.Name})
_, _ = clusterController.joinSwIPManager.EnsureJoinLRPIPs(types.OVNClusterRouter)
// Let the real code run and ensure OVN database sync
gomega.Expect(clusterController.WatchNodes()).To(gomega.Succeed())
// ensure db is consistent
subnet := ovntest.MustParseIPNet(node1.NodeSubnet)

var clusterSubnets []*net.IPNet
for _, clusterSubnet := range config.Default.ClusterSubnets {
clusterSubnets = append(clusterSubnets, clusterSubnet.CIDR)
}
joinLRPIP, joinLRNetwork, _ := net.ParseCIDR(node1.LrpIP + "/16")
dLRPIP, dLRPNetwork, _ := net.ParseCIDR(node1.DrLrpIP + "/16")

joinLRPIPs := &net.IPNet{
IP: joinLRPIP,
Mask: joinLRNetwork.Mask,
}
dLRPIPs := &net.IPNet{
IP: dLRPIP,
Mask: dLRPNetwork.Mask,
}

skipSnat := false
expectedDatabaseState = generateGatewayInitExpectedNB(expectedDatabaseState, expectedOVNClusterRouter, expectedNodeSwitch, node1.Name, clusterSubnets, []*net.IPNet{subnet}, l3Config, []*net.IPNet{joinLRPIPs}, []*net.IPNet{dLRPIPs}, skipSnat, node1.NodeMgmtPortIP)
gomega.Eventually(libovsdbOvnNBClient).Should(libovsdbtest.HaveData(expectedDatabaseState))

ginkgo.By("modifying the node and triggering an update")

var podsWereListed uint32
kubeFakeClient.PrependReactor("list", "pods", func(action clienttesting.Action) (handled bool, ret runtime.Object, err error) {
atomic.StoreUint32(&podsWereListed, 1)
podList := &v1.PodList{}
return true, podList, nil
})

// modify the node and trigger an update
err = nodeAnnotator.Set("foobar", "baz")
gomega.Expect(err).NotTo(gomega.HaveOccurred())
err = nodeAnnotator.Run()
gomega.Expect(err).NotTo(gomega.HaveOccurred())
gomega.Consistently(func() uint32 {
return atomic.LoadUint32(&podsWereListed)
}, 10).Should(gomega.Equal(uint32(0)))

return nil
}

err := app.Run([]string{
app.Name,
"-cluster-subnets=" + clusterCIDR,
"--init-gateways",
"--nodeport",
"--disable-snat-multiple-gws=false",
})
gomega.Expect(err).NotTo(gomega.HaveOccurred())
})

ginkgo.It("use node retry with updating a node", func() {

app.Action = func(ctx *cli.Context) error {
Expand Down

0 comments on commit bc09444

Please sign in to comment.