Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix standalone VM EnsureBackendPoolDeleted #4217

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 4 additions & 1 deletion pkg/provider/azure.go
Original file line number Diff line number Diff line change
Expand Up @@ -1229,9 +1229,11 @@ func (az *Cloud) updateNodeCaches(prevNode, newNode *v1.Node) {
switch {
case !isNodeManagedByCloudProvider:
az.excludeLoadBalancerNodes.Insert(newNode.ObjectMeta.Name)
klog.V(6).Infof("excluding Node %q from LoadBalancer because it is not managed by cloud provider", newNode.ObjectMeta.Name)

case hasExcludeBalancerLabel:
az.excludeLoadBalancerNodes.Insert(newNode.ObjectMeta.Name)
klog.V(6).Infof("excluding Node %q from LoadBalancer because it has exclude-from-external-load-balancers label", newNode.ObjectMeta.Name)

case !isNodeReady(newNode) && nodemanager.GetCloudTaint(newNode.Spec.Taints) == nil:
// If not in ready state and not a newly created node, add to excludeLoadBalancerNodes cache.
Expand All @@ -1240,6 +1242,7 @@ func (az *Cloud) updateNodeCaches(prevNode, newNode *v1.Node) {
// VMSS API calls and not provoke VMScaleSetActiveModelsCountLimitReached.
// (https://github.com/kubernetes-sigs/cloud-provider-azure/issues/851)
az.excludeLoadBalancerNodes.Insert(newNode.ObjectMeta.Name)
klog.V(6).Infof("excluding Node %q from LoadBalancer because it is not in ready state or a newly created one", newNode.ObjectMeta.Name)

default:
// Nodes not falling into the three cases above are valid backends and
Expand All @@ -1253,7 +1256,7 @@ func (az *Cloud) updateNodeCaches(prevNode, newNode *v1.Node) {
az.nodePrivateIPs[newNode.Name] = sets.New[string]()
}

klog.V(4).Infof("adding IP address %s of the node %s", address, newNode.Name)
klog.V(6).Infof("adding IP address %s of the node %s", address, newNode.Name)
az.nodePrivateIPs[newNode.Name].Insert(address)
}
}
Expand Down
6 changes: 3 additions & 3 deletions pkg/provider/azure_loadbalancer.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,15 +117,15 @@ func getPublicIPDomainNameLabel(service *v1.Service) (string, bool) {
// reconcileService reconcile the LoadBalancer service. It returns LoadBalancerStatus on success.
func (az *Cloud) reconcileService(ctx context.Context, clusterName string, service *v1.Service, nodes []*v1.Node) (*v1.LoadBalancerStatus, error) {
serviceName := getServiceName(service)
resourceBaseName := az.GetLoadBalancerName(context.TODO(), "", service)
klog.V(2).Infof("reconcileService: Start reconciling Service %q with its resource basename %q", serviceName, resourceBaseName)

lb, err := az.reconcileLoadBalancer(clusterName, service, nodes, true /* wantLb */)
if err != nil {
klog.Errorf("reconcileLoadBalancer(%s) failed: %v", serviceName, err)
return nil, err
}

resourceBaseName := az.GetLoadBalancerName(context.TODO(), "", service)
klog.V(2).Infof("reconcileService: Start reconciling Service %q with its resource basename %q", serviceName, resourceBaseName)

lbStatus, lbIPsPrimaryPIPs, fipConfigs, err := az.getServiceLoadBalancerStatus(service, lb)
if err != nil {
klog.Errorf("getServiceLoadBalancerStatus(%s) failed: %v", serviceName, err)
Expand Down
2 changes: 1 addition & 1 deletion pkg/provider/azure_loadbalancer_backendpool.go
Original file line number Diff line number Diff line change
Expand Up @@ -623,7 +623,7 @@ func (bi *backendPoolTypeNodeIP) ReconcileBackendPools(clusterName string, servi
var nodeIPAddressesToBeDeleted []string
for nodeName := range bi.excludeLoadBalancerNodes {
for ip := range bi.nodePrivateIPs[nodeName] {
klog.V(2).Infof("bi.ReconcileBackendPools for service (%s): found unwanted node private IP %s, decoupling it from the LB %s", serviceName, ip, lbName)
klog.V(2).Infof("bi.ReconcileBackendPools for service (%s): found unwanted node private IP %s, decouple it from the LB %s", serviceName, ip, lbName)
nodeIPAddressesToBeDeleted = append(nodeIPAddressesToBeDeleted, ip)
}
}
Expand Down
3 changes: 2 additions & 1 deletion pkg/provider/azure_standard.go
Original file line number Diff line number Diff line change
Expand Up @@ -1149,7 +1149,8 @@ func (as *availabilitySet) EnsureBackendPoolDeleted(service *v1.Service, backend
return false, fmt.Errorf("EnsureBackendPoolDeleted: failed to parse the VMAS ID %s: %w", vmasID, err)
}
// Only remove nodes belonging to specified vmSet to basic LB backends.
if !strings.EqualFold(vmasName, vmSetName) {
// If vmasID is empty, then it is standalone VM.
if vmasID != "" && !strings.EqualFold(vmasName, vmSetName) {
klog.V(2).Infof("EnsureBackendPoolDeleted: skipping the node %s belonging to another vm set %s", nodeName, vmasName)
continue
}
Expand Down
112 changes: 78 additions & 34 deletions tests/e2e/network/ensureloadbalancer.go
Original file line number Diff line number Diff line change
Expand Up @@ -649,61 +649,79 @@ var _ = Describe("Ensure LoadBalancer", Label(utils.TestSuiteLabelLB), func() {
})

It("should support node label `node.kubernetes.io/exclude-from-external-load-balancers`", func() {
if os.Getenv(utils.AKSTestCCM) == "" {
Skip("Skip this test case for non-AKS test")
}

label := "node.kubernetes.io/exclude-from-external-load-balancers"
By("Checking the number of the node pools")
nodes, err := utils.GetAgentNodes(cs)
Expect(err).NotTo(HaveOccurred())
initNodepoolNodeMap := utils.GetNodepoolNodeMap(&nodes)
if len(initNodepoolNodeMap) != 1 {
Skip("single node pool is needed in this scenario")
if os.Getenv(utils.AKSTestCCM) != "" {
// AKS
initNodepoolNodeMap := utils.GetNodepoolNodeMap(&nodes)
if len(initNodepoolNodeMap) != 1 {
Skip("single node pool is needed in this scenario")
}
}

By("Creating a service to trigger the LB reconcile")
service := utils.CreateLoadBalancerServiceManifest(testServiceName, nil, labels, ns.Name, ports)
_, err = cs.CoreV1().Services(ns.Name).Create(context.TODO(), service, metav1.CreateOptions{})
Expect(err).NotTo(HaveOccurred())
publicIPs, err := utils.WaitServiceExposureAndValidateConnectivity(cs, tc.IPFamily, ns.Name, testServiceName, []string{})
defer func() {
deleteSvcErr := utils.DeleteService(cs, ns.Name, testServiceName)
Expect(deleteSvcErr).NotTo(HaveOccurred())
}()
Expect(err).NotTo(HaveOccurred())
Expect(len(publicIPs)).NotTo(BeZero())
publicIP := publicIPs[0]

By("Checking the initial node number in the LB backend pool")
lb := getAzureLoadBalancerFromPIP(tc, publicIP, tc.GetResourceGroup(), "")
lbBackendPoolIPConfigsCount := 0
// Here we use BackendPool IP instead of IP config because this works for both NIC based LB and IP based LB.
lbBackendPoolIPCount := 0
lbBackendPoolIPs := (*lb.BackendAddressPools)[getLBBackendPoolIndex(lb)].LoadBalancerBackendAddresses
Expect(lbBackendPoolIPs).NotTo(BeNil())
if utils.IsAutoscalingAKSCluster() {
for _, ipconfig := range *(*lb.BackendAddressPools)[getLBBackendPoolIndex(lb)].BackendIPConfigurations {
if !strings.Contains(*ipconfig.ID, utils.SystemPool) {
lbBackendPoolIPConfigsCount++
for _, ip := range *lbBackendPoolIPs {
Expect(ip.LoadBalancerBackendAddressPropertiesFormat).NotTo(BeNil())
Expect(ip.LoadBalancerBackendAddressPropertiesFormat.NetworkInterfaceIPConfiguration).NotTo(BeNil())
ipConfigID := pointer.StringDeref(ip.LoadBalancerBackendAddressPropertiesFormat.NetworkInterfaceIPConfiguration.ID, "")
if !strings.Contains(ipConfigID, utils.SystemPool) {
lbBackendPoolIPCount++
}
}
} else {
lbBackendPoolIPConfigsCount = len(*(*lb.BackendAddressPools)[getLBBackendPoolIndex(lb)].BackendIPConfigurations)
lbBackendPoolIPCount = len(*lbBackendPoolIPs)
}
nodes, err = utils.GetAgentNodes(cs)
Expect(err).NotTo(HaveOccurred())
Expect(lbBackendPoolIPConfigsCount).To(Equal(len(nodes)))
Expect(lbBackendPoolIPCount).To(Equal(len(nodes)))
utils.Logf("Initial node number in the LB backend pool is %d", lbBackendPoolIPCount)
nodeToLabel := nodes[0]

By("Labeling node")
node, err := utils.LabelNode(cs, &nodes[0], label, false)
By(fmt.Sprintf("Labeling node %q", nodeToLabel.Name))
node, err := utils.LabelNode(cs, &nodeToLabel, label, false)
defer func() {
node, err = utils.GetNode(cs, node.Name)
Expect(err).NotTo(HaveOccurred())
_, err = utils.LabelNode(cs, node, label, true)
Expect(err).NotTo(HaveOccurred())
}()
Expect(err).NotTo(HaveOccurred())
addDummyAnnotationWithServiceName(cs, ns.Name, testServiceName)
var expectedCount int
if len(nodes) == 1 {
expectedCount = 1
} else {
expectedCount = len(nodes) - 1
}

err = waitForNodesInLBBackendPool(tc, publicIP, expectedCount)
Expect(err).NotTo(HaveOccurred())

By("Unlabeling node")
By(fmt.Sprintf("Unlabeling node %q", nodeToLabel.Name))
node, err = utils.GetNode(cs, node.Name)
Expect(err).NotTo(HaveOccurred())
_, err = utils.LabelNode(cs, node, label, true)
Expect(err).NotTo(HaveOccurred())
addDummyAnnotationWithServiceName(cs, ns.Name, testServiceName)
err = waitForNodesInLBBackendPool(tc, publicIP, len(nodes))
Expect(err).NotTo(HaveOccurred())
})
Expand Down Expand Up @@ -1019,18 +1037,37 @@ var _ = Describe("EnsureLoadBalancer should not update any resources when servic
})
})

func updateServiceAndCompareEtags(tc *utils.AzureTestClient, cs clientset.Interface, ns *v1.Namespace, service *v1.Service, ip string, isInternal bool) {
utils.Logf("Retrieving etags from resources")
lbEtag, nsgEtag, pipEtag := getResourceEtags(tc, ip, cloudprovider.DefaultLoadBalancerName(service), isInternal)
func addDummyAnnotationWithServiceName(cs clientset.Interface, namespace string, serviceName string) {
service, err := cs.CoreV1().Services(namespace).Get(context.TODO(), serviceName, metav1.GetOptions{})
Expect(err).NotTo(HaveOccurred())
addDummyAnnotationWithService(cs, namespace, service)
}

utils.Logf("Adding a dummy annotation to trigger a second service reconciliation")
func addDummyAnnotationWithService(cs clientset.Interface, namespace string, service *v1.Service) {
utils.Logf("Adding a dummy annotation to trigger Service reconciliation")
Expect(service).NotTo(BeNil())
annotation := service.GetAnnotations()
annotation["dummy-annotation"] = "dummy"
if annotation == nil {
annotation = make(map[string]string)
}
// e2e test should not have 100+ dummy annotations.
for i := 0; i < 100; i++ {
if _, ok := annotation["dummy-annotation"+strconv.Itoa(i)]; !ok {
annotation["dummy-annotation"+strconv.Itoa(i)] = "dummy"
break
}
}
service = updateServiceAnnotation(service, annotation)
utils.Logf("service's annotations: %v", annotation)
_, err := cs.CoreV1().Services(ns.Name).Update(context.TODO(), service, metav1.UpdateOptions{})
utils.Logf("Service's annotations: %v", annotation)
_, err := cs.CoreV1().Services(namespace).Update(context.TODO(), service, metav1.UpdateOptions{})
Expect(err).NotTo(HaveOccurred())
}

func updateServiceAndCompareEtags(tc *utils.AzureTestClient, cs clientset.Interface, ns *v1.Namespace, service *v1.Service, ip string, isInternal bool) {
utils.Logf("Retrieving etags from resources")
lbEtag, nsgEtag, pipEtag := getResourceEtags(tc, ip, cloudprovider.DefaultLoadBalancerName(service), isInternal)

addDummyAnnotationWithService(cs, ns.Name, service)
ips, err := utils.WaitServiceExposureAndValidateConnectivity(cs, tc.IPFamily, ns.Name, testServiceName, []string{})
Expect(err).NotTo(HaveOccurred())
Expect(len(ips)).NotTo(BeZero())
Expand Down Expand Up @@ -1133,23 +1170,30 @@ func getAzureInternalLoadBalancerFromPrivateIP(tc *utils.AzureTestClient, ip, lb
func waitForNodesInLBBackendPool(tc *utils.AzureTestClient, ip string, expectedNum int) error {
return wait.PollImmediate(10*time.Second, 10*time.Minute, func() (done bool, err error) {
lb := getAzureLoadBalancerFromPIP(tc, ip, tc.GetResourceGroup(), "")
lbBackendPoolIPConfigs := (*lb.BackendAddressPools)[getLBBackendPoolIndex(lb)].BackendIPConfigurations
ipConfigNum := 0
if lbBackendPoolIPConfigs != nil {
lbBackendPoolIPs := (*lb.BackendAddressPools)[getLBBackendPoolIndex(lb)].LoadBalancerBackendAddresses
ipNum := 0
if lbBackendPoolIPs != nil {
if utils.IsAutoscalingAKSCluster() {
for _, ipconfig := range *lbBackendPoolIPConfigs {
if !strings.Contains(*ipconfig.ID, utils.SystemPool) {
ipConfigNum++
// Autoscaling tests don't include IP based LB.
for _, ip := range *lbBackendPoolIPs {
if ip.LoadBalancerBackendAddressPropertiesFormat == nil ||
ip.LoadBalancerBackendAddressPropertiesFormat.NetworkInterfaceIPConfiguration == nil {
return false, fmt.Errorf("LB backendPool address's NIC IP config ID is nil")
}
ipConfigID := pointer.StringDeref(ip.LoadBalancerBackendAddressPropertiesFormat.NetworkInterfaceIPConfiguration.ID, "")
if !strings.Contains(ipConfigID, utils.SystemPool) {
ipNum++
}
}
} else {
ipConfigNum = len(*lbBackendPoolIPConfigs)
ipNum = len(*lbBackendPoolIPs)
}
}
if expectedNum == ipConfigNum {
if ipNum == expectedNum {
utils.Logf("Number of IPs matches expected number %d. Success", expectedNum)
return true, nil
}
utils.Logf("Number of IP configs: %d in the LB backend pool, expected %d, will retry soon", ipConfigNum, expectedNum)
utils.Logf("Number of IPs: %d in the LB backend pool, expected %d, will retry soon", ipNum, expectedNum)
return false, nil
})
}
Expand Down
9 changes: 0 additions & 9 deletions tests/e2e/utils/azure_auth.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,6 @@ const (
ClusterLocationEnv = "AZURE_LOCATION"
ClusterEnvironment = "AZURE_ENVIRONMENT"
LoadBalancerSkuEnv = "AZURE_LOADBALANCER_SKU"
// If "TEST_CCM" is true, the test is running on a CAPZ cluster.
CAPZTestCCM = "TEST_CCM"
// If "E2E_ON_AKS_CLUSTER" is true, the test is running on a AKS cluster.
AKSTestCCM = "E2E_ON_AKS_CLUSTER"
AKSClusterType = "CLUSTER_TYPE"
// If "INGEST_TEST_RESULT" is true, the test result needs ingestion to kusto
IngestTestResult = "INGEST_TEST_RESULT"

TrueValue = "true"
)

// AzureAuthConfig holds auth related part of cloud config
Expand Down
12 changes: 12 additions & 0 deletions tests/e2e/utils/consts.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,16 @@ const (
TestSuiteLabelLB = "LB"
TestSuiteLabelMultiPorts = "Multi-Ports"
TestSuiteLabelNSG = "NSG"

// If "TEST_CCM" is true, the test is running on a CAPZ cluster.
CAPZTestCCM = "TEST_CCM"
// If "E2E_ON_AKS_CLUSTER" is true, the test is running on a AKS cluster.
AKSTestCCM = "E2E_ON_AKS_CLUSTER"
AKSClusterType = "CLUSTER_TYPE"
// If "INGEST_TEST_RESULT" is true, the test result needs ingestion to kusto
IngestTestResult = "INGEST_TEST_RESULT"
// LB backendpool config type, may be nodeIP
LBBackendPoolConfigType = "LB_BACKEND_POOL_CONFIG_TYPE"

TrueValue = "true"
)
19 changes: 16 additions & 3 deletions tests/e2e/utils/node_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,16 @@ func IsAutoscalingAKSCluster() bool {
return os.Getenv(AKSTestCCM) != "" && strings.Contains(os.Getenv(AKSClusterType), "autoscaling")
}

// Before using the node config to update Node, some fields need to be cleaned.
func cleanNodeConfigBeforeUpdate(node *v1.Node) {
node.CreationTimestamp = metav1.Time{}
node.ResourceVersion = ""
node.SetSelfLink("")
node.SetUID("")
}

func LabelNode(cs clientset.Interface, node *v1.Node, label string, isDelete bool) (*v1.Node, error) {
cleanNodeConfigBeforeUpdate(node)
if _, ok := node.Labels[label]; ok {
if isDelete {
delete(node.Labels, label)
Expand All @@ -302,9 +311,13 @@ func LabelNode(cs clientset.Interface, node *v1.Node, label string, isDelete boo
Logf("Found label %s on node %s, do nothing", label, node.Name)
return node, nil
}
node.Labels[label] = TrueValue
node, err := cs.CoreV1().Nodes().Update(context.Background(), node, metav1.UpdateOptions{})
return node, err
if !isDelete {
node.Labels[label] = TrueValue
node, err := cs.CoreV1().Nodes().Update(context.Background(), node, metav1.UpdateOptions{})
return node, err
}
Logf("No such label %s on node %s, do nothing", label, node.Name)
return node, nil
}

func GetNodepoolNodeMap(nodes *[]v1.Node) map[string][]string {
Expand Down