Skip to content

Commit

Permalink
Fix standalone VM EnsureBackendPoolDeleted
Browse files Browse the repository at this point in the history
* Fix ensureBackendPoolDeleted for standalone VM
* Trigger Service reconcile for exclude-from-external-load-balancers e2e
test
* Refactor e2e
* Add logs for exclude-from-external-load-balancers

Signed-off-by: Zhecheng Li <zhechengli@microsoft.com>
  • Loading branch information
lzhecheng committed Jul 4, 2023
1 parent b83f26b commit 452b9c3
Show file tree
Hide file tree
Showing 6 changed files with 76 additions and 33 deletions.
3 changes: 3 additions & 0 deletions pkg/provider/azure.go
Original file line number Diff line number Diff line change
Expand Up @@ -1229,9 +1229,11 @@ func (az *Cloud) updateNodeCaches(prevNode, newNode *v1.Node) {
switch {
case !isNodeManagedByCloudProvider:
az.excludeLoadBalancerNodes.Insert(newNode.ObjectMeta.Name)
klog.V(4).Infof("excluding Node %q from LoadBalancer because it is not managed by cloud provider", newNode.ObjectMeta.Name)

case hasExcludeBalancerLabel:
az.excludeLoadBalancerNodes.Insert(newNode.ObjectMeta.Name)
klog.V(4).Infof("excluding Node %q from LoadBalancer because it has exclude-from-external-load-balancers label", newNode.ObjectMeta.Name)

case !isNodeReady(newNode) && nodemanager.GetCloudTaint(newNode.Spec.Taints) == nil:
// If not in ready state and not a newly created node, add to excludeLoadBalancerNodes cache.
Expand All @@ -1240,6 +1242,7 @@ func (az *Cloud) updateNodeCaches(prevNode, newNode *v1.Node) {
// VMSS API calls and not provoke VMScaleSetActiveModelsCountLimitReached.
// (https://github.com/kubernetes-sigs/cloud-provider-azure/issues/851)
az.excludeLoadBalancerNodes.Insert(newNode.ObjectMeta.Name)
klog.V(4).Infof("excluding Node %q from LoadBalancer because it is not in ready state or a newly created one", newNode.ObjectMeta.Name)

default:
// Nodes not falling into the three cases above are valid backends and
Expand Down
6 changes: 3 additions & 3 deletions pkg/provider/azure_loadbalancer.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,15 +117,15 @@ func getPublicIPDomainNameLabel(service *v1.Service) (string, bool) {
// reconcileService reconcile the LoadBalancer service. It returns LoadBalancerStatus on success.
func (az *Cloud) reconcileService(ctx context.Context, clusterName string, service *v1.Service, nodes []*v1.Node) (*v1.LoadBalancerStatus, error) {
serviceName := getServiceName(service)
resourceBaseName := az.GetLoadBalancerName(context.TODO(), "", service)
klog.V(2).Infof("reconcileService: Start reconciling Service %q with its resource basename %q", serviceName, resourceBaseName)

lb, err := az.reconcileLoadBalancer(clusterName, service, nodes, true /* wantLb */)
if err != nil {
klog.Errorf("reconcileLoadBalancer(%s) failed: %v", serviceName, err)
return nil, err
}

resourceBaseName := az.GetLoadBalancerName(context.TODO(), "", service)
klog.V(2).Infof("reconcileService: Start reconciling Service %q with its resource basename %q", serviceName, resourceBaseName)

lbStatus, lbIPsPrimaryPIPs, fipConfigs, err := az.getServiceLoadBalancerStatus(service, lb)
if err != nil {
klog.Errorf("getServiceLoadBalancerStatus(%s) failed: %v", serviceName, err)
Expand Down
3 changes: 2 additions & 1 deletion pkg/provider/azure_standard.go
Original file line number Diff line number Diff line change
Expand Up @@ -1149,7 +1149,8 @@ func (as *availabilitySet) EnsureBackendPoolDeleted(service *v1.Service, backend
return false, fmt.Errorf("EnsureBackendPoolDeleted: failed to parse the VMAS ID %s: %w", vmasID, err)
}
// Only remove nodes belonging to specified vmSet to basic LB backends.
if !strings.EqualFold(vmasName, vmSetName) {
// If vmasID is empty, then it is standalone VM.
if vmasID != "" && !strings.EqualFold(vmasName, vmSetName) {
klog.V(2).Infof("EnsureBackendPoolDeleted: skipping the node %s belonging to another vm set %s", nodeName, vmasName)
continue
}
Expand Down
76 changes: 56 additions & 20 deletions tests/e2e/network/ensureloadbalancer.go
Original file line number Diff line number Diff line change
Expand Up @@ -649,24 +649,32 @@ var _ = Describe("Ensure LoadBalancer", Label(utils.TestSuiteLabelLB), func() {
})

It("should support node label `node.kubernetes.io/exclude-from-external-load-balancers`", func() {
if os.Getenv(utils.AKSTestCCM) == "" {
Skip("Skip this test case for non-AKS test")
}

label := "node.kubernetes.io/exclude-from-external-load-balancers"
By("Checking the number of the node pools")
nodes, err := utils.GetAgentNodes(cs)
Expect(err).NotTo(HaveOccurred())
initNodepoolNodeMap := utils.GetNodepoolNodeMap(&nodes)
if len(initNodepoolNodeMap) != 1 {
Skip("single node pool is needed in this scenario")
if os.Getenv(utils.AKSTestCCM) != "" {
// AKS
initNodepoolNodeMap := utils.GetNodepoolNodeMap(&nodes)
if len(initNodepoolNodeMap) != 1 {
Skip("single node pool is needed in this scenario")
}
} else {
// CAPZ
if os.Getenv(utils.LBBackendPoolConfigType) == "nodeIP" {
Skip("Skip when LB backendpool config type is nodeIP")
}
}

By("Creating a service to trigger the LB reconcile")
service := utils.CreateLoadBalancerServiceManifest(testServiceName, nil, labels, ns.Name, ports)
_, err = cs.CoreV1().Services(ns.Name).Create(context.TODO(), service, metav1.CreateOptions{})
Expect(err).NotTo(HaveOccurred())
publicIPs, err := utils.WaitServiceExposureAndValidateConnectivity(cs, tc.IPFamily, ns.Name, testServiceName, []string{})
defer func() {
deleteSvcErr := utils.DeleteService(cs, ns.Name, testServiceName)
Expect(deleteSvcErr).NotTo(HaveOccurred())
}()
Expect(err).NotTo(HaveOccurred())
Expect(len(publicIPs)).NotTo(BeZero())
publicIP := publicIPs[0]
Expand All @@ -683,27 +691,30 @@ var _ = Describe("Ensure LoadBalancer", Label(utils.TestSuiteLabelLB), func() {
} else {
lbBackendPoolIPConfigsCount = len(*(*lb.BackendAddressPools)[getLBBackendPoolIndex(lb)].BackendIPConfigurations)
}
nodes, err = utils.GetAgentNodes(cs)
Expect(err).NotTo(HaveOccurred())
Expect(lbBackendPoolIPConfigsCount).To(Equal(len(nodes)))
utils.Logf("Initial node number in the LB backend pool is %d", lbBackendPoolIPConfigsCount)
nodeToLabel := nodes[0]

By("Labeling node")
node, err := utils.LabelNode(cs, &nodes[0], label, false)
By(fmt.Sprintf("Labeling node %q", nodeToLabel.Name))
node, err := utils.LabelNode(cs, &nodeToLabel, label, false)
Expect(err).NotTo(HaveOccurred())
addDummyAnnotationWithServiceName(cs, ns.Name, testServiceName)
var expectedCount int
if len(nodes) == 1 {
expectedCount = 1
} else {
expectedCount = len(nodes) - 1
}

err = waitForNodesInLBBackendPool(tc, publicIP, expectedCount)
Expect(err).NotTo(HaveOccurred())

By("Unlabeling node")
By(fmt.Sprintf("Unlabeling node %q", nodeToLabel.Name))
node, err = utils.GetNode(cs, node.Name)
Expect(err).NotTo(HaveOccurred())
_, err = utils.LabelNode(cs, node, label, true)
Expect(err).NotTo(HaveOccurred())
addDummyAnnotationWithServiceName(cs, ns.Name, testServiceName)
err = waitForNodesInLBBackendPool(tc, publicIP, len(nodes))
Expect(err).NotTo(HaveOccurred())
})
Expand Down Expand Up @@ -1019,18 +1030,37 @@ var _ = Describe("EnsureLoadBalancer should not update any resources when servic
})
})

func updateServiceAndCompareEtags(tc *utils.AzureTestClient, cs clientset.Interface, ns *v1.Namespace, service *v1.Service, ip string, isInternal bool) {
utils.Logf("Retrieving etags from resources")
lbEtag, nsgEtag, pipEtag := getResourceEtags(tc, ip, cloudprovider.DefaultLoadBalancerName(service), isInternal)
func addDummyAnnotationWithServiceName(cs clientset.Interface, namespace string, serviceName string) {
service, err := cs.CoreV1().Services(namespace).Get(context.TODO(), serviceName, metav1.GetOptions{})
Expect(err).NotTo(HaveOccurred())
addDummyAnnotationWithService(cs, namespace, service)
}

utils.Logf("Adding a dummy annotation to trigger a second service reconciliation")
func addDummyAnnotationWithService(cs clientset.Interface, namespace string, service *v1.Service) {
utils.Logf("Adding a dummy annotation to trigger Service reconciliation")
Expect(service).NotTo(BeNil())
annotation := service.GetAnnotations()
annotation["dummy-annotation"] = "dummy"
if annotation == nil {
annotation = make(map[string]string)
}
// e2e test should not have 100+ dummy annotations.
for i := 0; i < 100; i++ {
if _, ok := annotation["dummy-annotation"+strconv.Itoa(i)]; !ok {
annotation["dummy-annotation"+strconv.Itoa(i)] = "dummy"
break
}
}
service = updateServiceAnnotation(service, annotation)
utils.Logf("service's annotations: %v", annotation)
_, err := cs.CoreV1().Services(ns.Name).Update(context.TODO(), service, metav1.UpdateOptions{})
utils.Logf("Service's annotations: %v", annotation)
_, err := cs.CoreV1().Services(namespace).Update(context.TODO(), service, metav1.UpdateOptions{})
Expect(err).NotTo(HaveOccurred())
}

func updateServiceAndCompareEtags(tc *utils.AzureTestClient, cs clientset.Interface, ns *v1.Namespace, service *v1.Service, ip string, isInternal bool) {
utils.Logf("Retrieving etags from resources")
lbEtag, nsgEtag, pipEtag := getResourceEtags(tc, ip, cloudprovider.DefaultLoadBalancerName(service), isInternal)

addDummyAnnotationWithService(cs, ns.Name, service)
ips, err := utils.WaitServiceExposureAndValidateConnectivity(cs, tc.IPFamily, ns.Name, testServiceName, []string{})
Expect(err).NotTo(HaveOccurred())
Expect(len(ips)).NotTo(BeZero())
Expand Down Expand Up @@ -1135,21 +1165,27 @@ func waitForNodesInLBBackendPool(tc *utils.AzureTestClient, ip string, expectedN
lb := getAzureLoadBalancerFromPIP(tc, ip, tc.GetResourceGroup(), "")
lbBackendPoolIPConfigs := (*lb.BackendAddressPools)[getLBBackendPoolIndex(lb)].BackendIPConfigurations
ipConfigNum := 0
backendIPConfigIDs := []string{}
if lbBackendPoolIPConfigs != nil {
if utils.IsAutoscalingAKSCluster() {
for _, ipconfig := range *lbBackendPoolIPConfigs {
if !strings.Contains(*ipconfig.ID, utils.SystemPool) {
backendIPConfigIDs = append(backendIPConfigIDs, pointer.StringDeref(ipconfig.ID, ""))
ipConfigNum++
}
}
} else {
ipConfigNum = len(*lbBackendPoolIPConfigs)
for _, ipconfig := range *lbBackendPoolIPConfigs {
backendIPConfigIDs = append(backendIPConfigIDs, pointer.StringDeref(ipconfig.ID, ""))
}
}
}
if expectedNum == ipConfigNum {
utils.Logf("Number of IP configs matches expected number %d. Success", expectedNum)
return true, nil
}
utils.Logf("Number of IP configs: %d in the LB backend pool, expected %d, will retry soon", ipConfigNum, expectedNum)
utils.Logf("Number of IP configs: %d in the LB backend pool, expected %d, will retry soon, backend IP config IDs: %q", ipConfigNum, expectedNum, backendIPConfigIDs)
return false, nil
})
}
Expand Down
9 changes: 0 additions & 9 deletions tests/e2e/utils/azure_auth.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,6 @@ const (
ClusterLocationEnv = "AZURE_LOCATION"
ClusterEnvironment = "AZURE_ENVIRONMENT"
LoadBalancerSkuEnv = "AZURE_LOADBALANCER_SKU"
// If "TEST_CCM" is true, the test is running on a CAPZ cluster.
CAPZTestCCM = "TEST_CCM"
// If "E2E_ON_AKS_CLUSTER" is true, the test is running on a AKS cluster.
AKSTestCCM = "E2E_ON_AKS_CLUSTER"
AKSClusterType = "CLUSTER_TYPE"
// If "INGEST_TEST_RESULT" is true, the test result needs ingestion to kusto
IngestTestResult = "INGEST_TEST_RESULT"

TrueValue = "true"
)

// AzureAuthConfig holds auth related part of cloud config
Expand Down
12 changes: 12 additions & 0 deletions tests/e2e/utils/consts.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,16 @@ const (
TestSuiteLabelLB = "LB"
TestSuiteLabelMultiPorts = "Multi-Ports"
TestSuiteLabelNSG = "NSG"

// If "TEST_CCM" is true, the test is running on a CAPZ cluster.
CAPZTestCCM = "TEST_CCM"
// If "E2E_ON_AKS_CLUSTER" is true, the test is running on a AKS cluster.
AKSTestCCM = "E2E_ON_AKS_CLUSTER"
AKSClusterType = "CLUSTER_TYPE"
// If "INGEST_TEST_RESULT" is true, the test result needs ingestion to kusto
IngestTestResult = "INGEST_TEST_RESULT"
// LB backendpool config type, may be nodeIP
LBBackendPoolConfigType = "LB_BACKEND_POOL_CONFIG_TYPE"

TrueValue = "true"
)

0 comments on commit 452b9c3

Please sign in to comment.