Skip to content

Commit

Permalink
chore: the expected node count in the backend pool should be measured…
Browse files Browse the repository at this point in the history
… by the number of nodes hosting the service endpoints, not the number of nodes
  • Loading branch information
nilo19 committed Aug 31, 2023
1 parent 766ea33 commit f3eabac
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 7 deletions.
11 changes: 9 additions & 2 deletions pkg/provider/azure_local_services.go
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,7 @@ func (az *Cloud) setUpEndpointSlicesInformer(informerFactory informers.SharedInf
key := strings.ToLower(fmt.Sprintf("%s/%s", newES.Namespace, svcName))
si, found := az.getLocalServiceInfo(key)
if !found {
klog.V(4).Infof("EndpointSlice %s/%s belongs to service %s, but the service is not a local service, skip updating load balancer backend pool", key, newES.Namespace, newES.Name)
klog.V(4).Infof("EndpointSlice %s/%s belongs to service %s, but the service is not a local service, skip updating load balancer backend pool", newES.Namespace, newES.Name, key)
return
}
lbName, ipFamily := si.lbName, si.ipFamily
Expand Down Expand Up @@ -471,12 +471,16 @@ func newServiceInfo(ipFamily, lbName string) *serviceInfo {

// getLocalServiceEndpointsNodeNames gets the node names that host all endpoints of the local service.
func (az *Cloud) getLocalServiceEndpointsNodeNames(service *v1.Service) (sets.Set[string], error) {
var ep *discovery_v1.EndpointSlice
var (
ep *discovery_v1.EndpointSlice
foundInCache bool
)
az.endpointSlicesCache.Range(func(key, value interface{}) bool {
endpointSlice := value.(*discovery_v1.EndpointSlice)
if strings.EqualFold(getServiceNameOfEndpointSlice(endpointSlice), service.Name) &&
strings.EqualFold(endpointSlice.Namespace, service.Namespace) {
ep = endpointSlice
foundInCache = true
return false
}
return true
Expand All @@ -499,6 +503,9 @@ func (az *Cloud) getLocalServiceEndpointsNodeNames(service *v1.Service) (sets.Se
if ep == nil {
return nil, fmt.Errorf("failed to find EndpointSlice for service %s/%s", service.Namespace, service.Name)
}
if !foundInCache {
az.endpointSlicesCache.Store(strings.ToLower(fmt.Sprintf("%s/%s", ep.Namespace, ep.Name)), ep)
}

var nodeNames []string
for _, endpoint := range ep.Endpoints {
Expand Down
43 changes: 38 additions & 5 deletions tests/e2e/network/multiple_standard_lb.go
Original file line number Diff line number Diff line change
Expand Up @@ -189,12 +189,13 @@ var _ = Describe("Ensure LoadBalancer", Label(utils.TestSuiteLabelMultiSLB), fun
Expect(err).NotTo(HaveOccurred())
Expect(len(ips)).NotTo(BeZero())

nodes, err := utils.GetAgentNodes(cs)
By(fmt.Sprintf("Checking the node count in the local service backend pool to equal %d)", len(nodes)))
nodeNames, err := getDeploymentPodsNodeNames(cs, ns.Name, testDeploymentName)
Expect(err).NotTo(HaveOccurred())
By(fmt.Sprintf("Checking the node count in the local service backend pool to equal %d", len(nodeNames)))
clusterName := os.Getenv("CLUSTER_NAME")
Expect(err).NotTo(HaveOccurred())
expectedBPName := fmt.Sprintf("%s-%s", svc.Namespace, svc.Name)
err = checkNodeCountInBackendPoolByServiceIPs(tc, clusterName, expectedBPName, ips, len(nodes))
err = checkNodeCountInBackendPoolByServiceIPs(tc, clusterName, expectedBPName, ips, len(nodeNames))
Expect(err).NotTo(HaveOccurred())

By("Scaling the deployment to 3 replicas and then to 1")
Expand All @@ -221,10 +222,12 @@ var _ = Describe("Ensure LoadBalancer", Label(utils.TestSuiteLabelMultiSLB), fun
deployment.Spec.Replicas = pointer.Int32(5)
_, err = cs.AppsV1().Deployments(ns.Name).Update(context.Background(), deployment, metav1.UpdateOptions{})
Expect(err).NotTo(HaveOccurred())
nodeNames, err = getDeploymentPodsNodeNames(cs, ns.Name, testDeploymentName)
Expect(err).NotTo(HaveOccurred())
err = wait.PollUntilContextTimeout(context.Background(), 5*time.Second, 5*time.Minute, false, func(ctx context.Context) (bool, error) {
if err := checkNodeCountInBackendPoolByServiceIPs(tc, clusterName, expectedBPName, ips, len(nodes)); err != nil {
if err := checkNodeCountInBackendPoolByServiceIPs(tc, clusterName, expectedBPName, ips, len(nodeNames)); err != nil {
if strings.Contains(err.Error(), "expected node count") {
utils.Logf("Waiting for the node count in the backend pool to equal %d", len(nodes))
utils.Logf("Waiting for the node count in the backend pool to equal %d", len(nodeNames))
return false, nil
}
return false, err
Expand All @@ -235,6 +238,36 @@ var _ = Describe("Ensure LoadBalancer", Label(utils.TestSuiteLabelMultiSLB), fun
})
})

func getDeploymentPodsNodeNames(kubeClient clientset.Interface, namespace, deploymentName string) (map[string]bool, error) {
var (
podList *v1.PodList
res = make(map[string]bool)
err error
)
err = wait.PollUntilContextTimeout(context.Background(), 5*time.Second, 5*time.Minute, false, func(ctx context.Context) (bool, error) {
podList, err = kubeClient.CoreV1().Pods(namespace).List(context.Background(), metav1.ListOptions{})
if err != nil {
return false, err
}
for _, pod := range podList.Items {
if strings.HasPrefix(pod.Name, deploymentName) {
if pod.Spec.NodeName == "" {
utils.Logf("Waiting for pod %s to be running", pod.Name)
return false, nil
}
utils.Logf("Pod %s is running on node %s", pod.Name, pod.Spec.NodeName)
res[pod.Spec.NodeName] = true
}
}
return true, nil
})
if err != nil {
return nil, err
}

return res, nil
}

func checkNodeCountInBackendPoolByServiceIPs(tc *utils.AzureTestClient, expectedLBName, bpName string, svcIPs []string, expectedCount int) error {
for _, svcIP := range svcIPs {
lb := getAzureLoadBalancerFromPIP(tc, svcIP, tc.GetResourceGroup(), tc.GetResourceGroup())
Expand Down

0 comments on commit f3eabac

Please sign in to comment.