Skip to content

Commit

Permalink
Merge pull request #4217 from lzhecheng/print-backendIPConfigIDs-excl…
Browse files Browse the repository at this point in the history
…ude-from-external-load-balancers

Fix standalone VM EnsureBackendPoolDeleted
  • Loading branch information
k8s-ci-robot committed Jul 7, 2023
2 parents 8647594 + 3d1b7ca commit c48a4a7
Show file tree
Hide file tree
Showing 8 changed files with 116 additions and 52 deletions.
5 changes: 4 additions & 1 deletion pkg/provider/azure.go
Original file line number Diff line number Diff line change
Expand Up @@ -1229,9 +1229,11 @@ func (az *Cloud) updateNodeCaches(prevNode, newNode *v1.Node) {
switch {
case !isNodeManagedByCloudProvider:
az.excludeLoadBalancerNodes.Insert(newNode.ObjectMeta.Name)
klog.V(6).Infof("excluding Node %q from LoadBalancer because it is not managed by cloud provider", newNode.ObjectMeta.Name)

case hasExcludeBalancerLabel:
az.excludeLoadBalancerNodes.Insert(newNode.ObjectMeta.Name)
klog.V(6).Infof("excluding Node %q from LoadBalancer because it has exclude-from-external-load-balancers label", newNode.ObjectMeta.Name)

case !isNodeReady(newNode) && nodemanager.GetCloudTaint(newNode.Spec.Taints) == nil:
// If not in ready state and not a newly created node, add to excludeLoadBalancerNodes cache.
Expand All @@ -1240,6 +1242,7 @@ func (az *Cloud) updateNodeCaches(prevNode, newNode *v1.Node) {
// VMSS API calls and not provoke VMScaleSetActiveModelsCountLimitReached.
// (https://github.com/kubernetes-sigs/cloud-provider-azure/issues/851)
az.excludeLoadBalancerNodes.Insert(newNode.ObjectMeta.Name)
klog.V(6).Infof("excluding Node %q from LoadBalancer because it is not in ready state or a newly created one", newNode.ObjectMeta.Name)

default:
// Nodes not falling into the three cases above are valid backends and
Expand All @@ -1253,7 +1256,7 @@ func (az *Cloud) updateNodeCaches(prevNode, newNode *v1.Node) {
az.nodePrivateIPs[newNode.Name] = sets.New[string]()
}

klog.V(4).Infof("adding IP address %s of the node %s", address, newNode.Name)
klog.V(6).Infof("adding IP address %s of the node %s", address, newNode.Name)
az.nodePrivateIPs[newNode.Name].Insert(address)
}
}
Expand Down
6 changes: 3 additions & 3 deletions pkg/provider/azure_loadbalancer.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,15 +117,15 @@ func getPublicIPDomainNameLabel(service *v1.Service) (string, bool) {
// reconcileService reconcile the LoadBalancer service. It returns LoadBalancerStatus on success.
func (az *Cloud) reconcileService(ctx context.Context, clusterName string, service *v1.Service, nodes []*v1.Node) (*v1.LoadBalancerStatus, error) {
serviceName := getServiceName(service)
resourceBaseName := az.GetLoadBalancerName(context.TODO(), "", service)
klog.V(2).Infof("reconcileService: Start reconciling Service %q with its resource basename %q", serviceName, resourceBaseName)

lb, err := az.reconcileLoadBalancer(clusterName, service, nodes, true /* wantLb */)
if err != nil {
klog.Errorf("reconcileLoadBalancer(%s) failed: %v", serviceName, err)
return nil, err
}

resourceBaseName := az.GetLoadBalancerName(context.TODO(), "", service)
klog.V(2).Infof("reconcileService: Start reconciling Service %q with its resource basename %q", serviceName, resourceBaseName)

lbStatus, lbIPsPrimaryPIPs, fipConfigs, err := az.getServiceLoadBalancerStatus(service, lb)
if err != nil {
klog.Errorf("getServiceLoadBalancerStatus(%s) failed: %v", serviceName, err)
Expand Down
2 changes: 1 addition & 1 deletion pkg/provider/azure_loadbalancer_backendpool.go
Original file line number Diff line number Diff line change
Expand Up @@ -623,7 +623,7 @@ func (bi *backendPoolTypeNodeIP) ReconcileBackendPools(clusterName string, servi
var nodeIPAddressesToBeDeleted []string
for nodeName := range bi.excludeLoadBalancerNodes {
for ip := range bi.nodePrivateIPs[nodeName] {
klog.V(2).Infof("bi.ReconcileBackendPools for service (%s): found unwanted node private IP %s, decoupling it from the LB %s", serviceName, ip, lbName)
klog.V(2).Infof("bi.ReconcileBackendPools for service (%s): found unwanted node private IP %s, decouple it from the LB %s", serviceName, ip, lbName)
nodeIPAddressesToBeDeleted = append(nodeIPAddressesToBeDeleted, ip)
}
}
Expand Down
3 changes: 2 additions & 1 deletion pkg/provider/azure_standard.go
Original file line number Diff line number Diff line change
Expand Up @@ -1149,7 +1149,8 @@ func (as *availabilitySet) EnsureBackendPoolDeleted(service *v1.Service, backend
return false, fmt.Errorf("EnsureBackendPoolDeleted: failed to parse the VMAS ID %s: %w", vmasID, err)
}
// Only remove nodes belonging to specified vmSet to basic LB backends.
if !strings.EqualFold(vmasName, vmSetName) {
// If vmasID is empty, then it is standalone VM.
if vmasID != "" && !strings.EqualFold(vmasName, vmSetName) {
klog.V(2).Infof("EnsureBackendPoolDeleted: skipping the node %s belonging to another vm set %s", nodeName, vmasName)
continue
}
Expand Down
112 changes: 78 additions & 34 deletions tests/e2e/network/ensureloadbalancer.go
Original file line number Diff line number Diff line change
Expand Up @@ -649,61 +649,79 @@ var _ = Describe("Ensure LoadBalancer", Label(utils.TestSuiteLabelLB), func() {
})

It("should support node label `node.kubernetes.io/exclude-from-external-load-balancers`", func() {
if os.Getenv(utils.AKSTestCCM) == "" {
Skip("Skip this test case for non-AKS test")
}

label := "node.kubernetes.io/exclude-from-external-load-balancers"
By("Checking the number of the node pools")
nodes, err := utils.GetAgentNodes(cs)
Expect(err).NotTo(HaveOccurred())
initNodepoolNodeMap := utils.GetNodepoolNodeMap(&nodes)
if len(initNodepoolNodeMap) != 1 {
Skip("single node pool is needed in this scenario")
if os.Getenv(utils.AKSTestCCM) != "" {
// AKS
initNodepoolNodeMap := utils.GetNodepoolNodeMap(&nodes)
if len(initNodepoolNodeMap) != 1 {
Skip("single node pool is needed in this scenario")
}
}

By("Creating a service to trigger the LB reconcile")
service := utils.CreateLoadBalancerServiceManifest(testServiceName, nil, labels, ns.Name, ports)
_, err = cs.CoreV1().Services(ns.Name).Create(context.TODO(), service, metav1.CreateOptions{})
Expect(err).NotTo(HaveOccurred())
publicIPs, err := utils.WaitServiceExposureAndValidateConnectivity(cs, tc.IPFamily, ns.Name, testServiceName, []string{})
defer func() {
deleteSvcErr := utils.DeleteService(cs, ns.Name, testServiceName)
Expect(deleteSvcErr).NotTo(HaveOccurred())
}()
Expect(err).NotTo(HaveOccurred())
Expect(len(publicIPs)).NotTo(BeZero())
publicIP := publicIPs[0]

By("Checking the initial node number in the LB backend pool")
lb := getAzureLoadBalancerFromPIP(tc, publicIP, tc.GetResourceGroup(), "")
lbBackendPoolIPConfigsCount := 0
// Here we use BackendPool IP instead of IP config because this works for both NIC based LB and IP based LB.
lbBackendPoolIPCount := 0
lbBackendPoolIPs := (*lb.BackendAddressPools)[getLBBackendPoolIndex(lb)].LoadBalancerBackendAddresses
Expect(lbBackendPoolIPs).NotTo(BeNil())
if utils.IsAutoscalingAKSCluster() {
for _, ipconfig := range *(*lb.BackendAddressPools)[getLBBackendPoolIndex(lb)].BackendIPConfigurations {
if !strings.Contains(*ipconfig.ID, utils.SystemPool) {
lbBackendPoolIPConfigsCount++
for _, ip := range *lbBackendPoolIPs {
Expect(ip.LoadBalancerBackendAddressPropertiesFormat).NotTo(BeNil())
Expect(ip.LoadBalancerBackendAddressPropertiesFormat.NetworkInterfaceIPConfiguration).NotTo(BeNil())
ipConfigID := pointer.StringDeref(ip.LoadBalancerBackendAddressPropertiesFormat.NetworkInterfaceIPConfiguration.ID, "")
if !strings.Contains(ipConfigID, utils.SystemPool) {
lbBackendPoolIPCount++
}
}
} else {
lbBackendPoolIPConfigsCount = len(*(*lb.BackendAddressPools)[getLBBackendPoolIndex(lb)].BackendIPConfigurations)
lbBackendPoolIPCount = len(*lbBackendPoolIPs)
}
nodes, err = utils.GetAgentNodes(cs)
Expect(err).NotTo(HaveOccurred())
Expect(lbBackendPoolIPConfigsCount).To(Equal(len(nodes)))
Expect(lbBackendPoolIPCount).To(Equal(len(nodes)))
utils.Logf("Initial node number in the LB backend pool is %d", lbBackendPoolIPCount)
nodeToLabel := nodes[0]

By("Labeling node")
node, err := utils.LabelNode(cs, &nodes[0], label, false)
By(fmt.Sprintf("Labeling node %q", nodeToLabel.Name))
node, err := utils.LabelNode(cs, &nodeToLabel, label, false)
defer func() {
node, err = utils.GetNode(cs, node.Name)
Expect(err).NotTo(HaveOccurred())
_, err = utils.LabelNode(cs, node, label, true)
Expect(err).NotTo(HaveOccurred())
}()
Expect(err).NotTo(HaveOccurred())
addDummyAnnotationWithServiceName(cs, ns.Name, testServiceName)
var expectedCount int
if len(nodes) == 1 {
expectedCount = 1
} else {
expectedCount = len(nodes) - 1
}

err = waitForNodesInLBBackendPool(tc, publicIP, expectedCount)
Expect(err).NotTo(HaveOccurred())

By("Unlabeling node")
By(fmt.Sprintf("Unlabeling node %q", nodeToLabel.Name))
node, err = utils.GetNode(cs, node.Name)
Expect(err).NotTo(HaveOccurred())
_, err = utils.LabelNode(cs, node, label, true)
Expect(err).NotTo(HaveOccurred())
addDummyAnnotationWithServiceName(cs, ns.Name, testServiceName)
err = waitForNodesInLBBackendPool(tc, publicIP, len(nodes))
Expect(err).NotTo(HaveOccurred())
})
Expand Down Expand Up @@ -1019,18 +1037,37 @@ var _ = Describe("EnsureLoadBalancer should not update any resources when servic
})
})

func updateServiceAndCompareEtags(tc *utils.AzureTestClient, cs clientset.Interface, ns *v1.Namespace, service *v1.Service, ip string, isInternal bool) {
utils.Logf("Retrieving etags from resources")
lbEtag, nsgEtag, pipEtag := getResourceEtags(tc, ip, cloudprovider.DefaultLoadBalancerName(service), isInternal)
func addDummyAnnotationWithServiceName(cs clientset.Interface, namespace string, serviceName string) {
service, err := cs.CoreV1().Services(namespace).Get(context.TODO(), serviceName, metav1.GetOptions{})
Expect(err).NotTo(HaveOccurred())
addDummyAnnotationWithService(cs, namespace, service)
}

utils.Logf("Adding a dummy annotation to trigger a second service reconciliation")
func addDummyAnnotationWithService(cs clientset.Interface, namespace string, service *v1.Service) {
utils.Logf("Adding a dummy annotation to trigger Service reconciliation")
Expect(service).NotTo(BeNil())
annotation := service.GetAnnotations()
annotation["dummy-annotation"] = "dummy"
if annotation == nil {
annotation = make(map[string]string)
}
// e2e test should not have 100+ dummy annotations.
for i := 0; i < 100; i++ {
if _, ok := annotation["dummy-annotation"+strconv.Itoa(i)]; !ok {
annotation["dummy-annotation"+strconv.Itoa(i)] = "dummy"
break
}
}
service = updateServiceAnnotation(service, annotation)
utils.Logf("service's annotations: %v", annotation)
_, err := cs.CoreV1().Services(ns.Name).Update(context.TODO(), service, metav1.UpdateOptions{})
utils.Logf("Service's annotations: %v", annotation)
_, err := cs.CoreV1().Services(namespace).Update(context.TODO(), service, metav1.UpdateOptions{})
Expect(err).NotTo(HaveOccurred())
}

func updateServiceAndCompareEtags(tc *utils.AzureTestClient, cs clientset.Interface, ns *v1.Namespace, service *v1.Service, ip string, isInternal bool) {
utils.Logf("Retrieving etags from resources")
lbEtag, nsgEtag, pipEtag := getResourceEtags(tc, ip, cloudprovider.DefaultLoadBalancerName(service), isInternal)

addDummyAnnotationWithService(cs, ns.Name, service)
ips, err := utils.WaitServiceExposureAndValidateConnectivity(cs, tc.IPFamily, ns.Name, testServiceName, []string{})
Expect(err).NotTo(HaveOccurred())
Expect(len(ips)).NotTo(BeZero())
Expand Down Expand Up @@ -1133,23 +1170,30 @@ func getAzureInternalLoadBalancerFromPrivateIP(tc *utils.AzureTestClient, ip, lb
func waitForNodesInLBBackendPool(tc *utils.AzureTestClient, ip string, expectedNum int) error {
return wait.PollImmediate(10*time.Second, 10*time.Minute, func() (done bool, err error) {
lb := getAzureLoadBalancerFromPIP(tc, ip, tc.GetResourceGroup(), "")
lbBackendPoolIPConfigs := (*lb.BackendAddressPools)[getLBBackendPoolIndex(lb)].BackendIPConfigurations
ipConfigNum := 0
if lbBackendPoolIPConfigs != nil {
lbBackendPoolIPs := (*lb.BackendAddressPools)[getLBBackendPoolIndex(lb)].LoadBalancerBackendAddresses
ipNum := 0
if lbBackendPoolIPs != nil {
if utils.IsAutoscalingAKSCluster() {
for _, ipconfig := range *lbBackendPoolIPConfigs {
if !strings.Contains(*ipconfig.ID, utils.SystemPool) {
ipConfigNum++
// Autoscaling tests don't include IP based LB.
for _, ip := range *lbBackendPoolIPs {
if ip.LoadBalancerBackendAddressPropertiesFormat == nil ||
ip.LoadBalancerBackendAddressPropertiesFormat.NetworkInterfaceIPConfiguration == nil {
return false, fmt.Errorf("LB backendPool address's NIC IP config ID is nil")
}
ipConfigID := pointer.StringDeref(ip.LoadBalancerBackendAddressPropertiesFormat.NetworkInterfaceIPConfiguration.ID, "")
if !strings.Contains(ipConfigID, utils.SystemPool) {
ipNum++
}
}
} else {
ipConfigNum = len(*lbBackendPoolIPConfigs)
ipNum = len(*lbBackendPoolIPs)
}
}
if expectedNum == ipConfigNum {
if ipNum == expectedNum {
utils.Logf("Number of IPs matches expected number %d. Success", expectedNum)
return true, nil
}
utils.Logf("Number of IP configs: %d in the LB backend pool, expected %d, will retry soon", ipConfigNum, expectedNum)
utils.Logf("Number of IPs: %d in the LB backend pool, expected %d, will retry soon", ipNum, expectedNum)
return false, nil
})
}
Expand Down
9 changes: 0 additions & 9 deletions tests/e2e/utils/azure_auth.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,6 @@ const (
ClusterLocationEnv = "AZURE_LOCATION"
ClusterEnvironment = "AZURE_ENVIRONMENT"
LoadBalancerSkuEnv = "AZURE_LOADBALANCER_SKU"
// If "TEST_CCM" is true, the test is running on a CAPZ cluster.
CAPZTestCCM = "TEST_CCM"
// If "E2E_ON_AKS_CLUSTER" is true, the test is running on a AKS cluster.
AKSTestCCM = "E2E_ON_AKS_CLUSTER"
AKSClusterType = "CLUSTER_TYPE"
// If "INGEST_TEST_RESULT" is true, the test result needs ingestion to kusto
IngestTestResult = "INGEST_TEST_RESULT"

TrueValue = "true"
)

// AzureAuthConfig holds auth related part of cloud config
Expand Down
12 changes: 12 additions & 0 deletions tests/e2e/utils/consts.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,16 @@ const (
TestSuiteLabelLB = "LB"
TestSuiteLabelMultiPorts = "Multi-Ports"
TestSuiteLabelNSG = "NSG"

// If "TEST_CCM" is true, the test is running on a CAPZ cluster.
CAPZTestCCM = "TEST_CCM"
// If "E2E_ON_AKS_CLUSTER" is true, the test is running on a AKS cluster.
AKSTestCCM = "E2E_ON_AKS_CLUSTER"
AKSClusterType = "CLUSTER_TYPE"
// If "INGEST_TEST_RESULT" is true, the test result needs ingestion to kusto
IngestTestResult = "INGEST_TEST_RESULT"
// LB backendpool config type, may be nodeIP
LBBackendPoolConfigType = "LB_BACKEND_POOL_CONFIG_TYPE"

TrueValue = "true"
)
19 changes: 16 additions & 3 deletions tests/e2e/utils/node_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,16 @@ func IsAutoscalingAKSCluster() bool {
return os.Getenv(AKSTestCCM) != "" && strings.Contains(os.Getenv(AKSClusterType), "autoscaling")
}

// Before using the node config to update Node, some fields need to be cleaned.
func cleanNodeConfigBeforeUpdate(node *v1.Node) {
node.CreationTimestamp = metav1.Time{}
node.ResourceVersion = ""
node.SetSelfLink("")
node.SetUID("")
}

func LabelNode(cs clientset.Interface, node *v1.Node, label string, isDelete bool) (*v1.Node, error) {
cleanNodeConfigBeforeUpdate(node)
if _, ok := node.Labels[label]; ok {
if isDelete {
delete(node.Labels, label)
Expand All @@ -302,9 +311,13 @@ func LabelNode(cs clientset.Interface, node *v1.Node, label string, isDelete boo
Logf("Found label %s on node %s, do nothing", label, node.Name)
return node, nil
}
node.Labels[label] = TrueValue
node, err := cs.CoreV1().Nodes().Update(context.Background(), node, metav1.UpdateOptions{})
return node, err
if !isDelete {
node.Labels[label] = TrueValue
node, err := cs.CoreV1().Nodes().Update(context.Background(), node, metav1.UpdateOptions{})
return node, err
}
Logf("No such label %s on node %s, do nothing", label, node.Name)
return node, nil
}

func GetNodepoolNodeMap(nodes *[]v1.Node) map[string][]string {
Expand Down

0 comments on commit c48a4a7

Please sign in to comment.