Skip to content

Commit

Permalink
Merge pull request #794 from feiskyer/fix-err-logs
Browse files Browse the repository at this point in the history
fix: consolidate logs for instance not found error
  • Loading branch information
k8s-ci-robot committed Sep 6, 2021
2 parents f1bbd96 + 3fac30e commit 75e477f
Show file tree
Hide file tree
Showing 5 changed files with 84 additions and 14 deletions.
14 changes: 6 additions & 8 deletions pkg/provider/azure_loadbalancer.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package provider
import (
"context"
"encoding/json"
"errors"
"fmt"
"math"
"reflect"
Expand Down Expand Up @@ -227,7 +228,7 @@ func (az *Cloud) cleanupVMSetFromBackendPoolByCondition(slb *network.LoadBalance
ipConf := (*bp.BackendIPConfigurations)[i]
ipConfigID := to.String(ipConf.ID)
_, vmSetName, err := az.VMSet.GetNodeNameByIPConfigurationID(ipConfigID)
if err != nil {
if err != nil && !errors.Is(err, cloudprovider.InstanceNotFound) {
return nil, err
}

Expand Down Expand Up @@ -1663,7 +1664,7 @@ func (az *Cloud) reconcileLBProbes(lb *network.LoadBalancer, service *v1.Service
}
}
if dirtyProbes {
probesJSON, _ := json.MarshalIndent(expectedProbes, "", " ")
probesJSON, _ := json.Marshal(expectedProbes)
klog.V(2).Infof("reconcileLoadBalancer for service (%s)(%t): lb probes updated: %s", serviceName, wantLb, string(probesJSON))
lb.Probes = &updatedProbes
}
Expand Down Expand Up @@ -1709,7 +1710,7 @@ func (az *Cloud) reconcileLBRules(lb *network.LoadBalancer, service *v1.Service,
}
}
if dirtyRules {
ruleJSON, _ := json.MarshalIndent(expectedRules, "", " ")
ruleJSON, _ := json.Marshal(expectedRules)
klog.V(2).Infof("reconcileLoadBalancer for service (%s)(%t): lb rules updated: %s", serviceName, wantLb, string(ruleJSON))
lb.LoadBalancingRules = &updatedRules
}
Expand Down Expand Up @@ -1887,13 +1888,10 @@ func (az *Cloud) reconcileBackendPools(clusterName string, service *v1.Service,
for _, ipConf := range *bp.BackendIPConfigurations {
ipConfID := to.String(ipConf.ID)
nodeName, _, err := az.VMSet.GetNodeNameByIPConfigurationID(ipConfID)
if err != nil {
if err != nil && !errors.Is(err, cloudprovider.InstanceNotFound) {
return false, false, err
}
if nodeName == "" {
// VM may under deletion
continue
}

// If a node is not supposed to be included in the LB, it
// would not be in the `nodes` slice. We need to check the nodes that
// have been added to the LB's backendpool, find the unwanted ones and
Expand Down
60 changes: 60 additions & 0 deletions pkg/provider/azure_loadbalancer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ import (

v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
cloudprovider "k8s.io/cloud-provider"

"sigs.k8s.io/cloud-provider-azure/pkg/azureclients/interfaceclient/mockinterfaceclient"
"sigs.k8s.io/cloud-provider-azure/pkg/azureclients/loadbalancerclient/mockloadbalancerclient"
Expand Down Expand Up @@ -4109,6 +4110,65 @@ func TestCleanupVMSetFromBackendPoolByCondition(t *testing.T) {
assert.Equal(t, expectedLB, *cleanedLB)
}

func TestCleanupVMSetFromBackendPoolForInstanceNotFound(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()
cloud := GetTestCloud(ctrl)
cloud.LoadBalancerSku = consts.LoadBalancerSkuStandard
cloud.EnableMultipleStandardLoadBalancers = true
cloud.PrimaryAvailabilitySetName = "agentpool1-availabilitySet-00000000"
clusterName := "testCluster"
service := getTestService("test", v1.ProtocolTCP, nil, false, 80)
lb := buildDefaultTestLB("testCluster", []string{
"/subscriptions/subscription/resourceGroups/rg/providers/Microsoft.Network/networkInterfaces/k8s-agentpool1-00000000-nic-1/ipConfigurations/ipconfig1",
"/subscriptions/subscription/resourceGroups/rg/providers/Microsoft.Network/networkInterfaces/k8s-agentpool2-00000000-nic-1/ipConfigurations/ipconfig1",
})
// existingVMForAS1 := buildDefaultTestVirtualMachine("/subscriptions/subscription/resourceGroups/rg/providers/Microsoft.Compute/availabilitySets/agentpool1-availabilitySet-00000000", []string{"/subscriptions/subscription/resourceGroups/rg/providers/Microsoft.Network/networkInterfaces/k8s-agentpool1-00000000-nic-1"})
existingVMForAS2 := buildDefaultTestVirtualMachine("/subscriptions/subscription/resourceGroups/rg/providers/Microsoft.Compute/availabilitySets/agentpool2-availabilitySet-00000000", []string{"/subscriptions/subscription/resourceGroups/rg/providers/Microsoft.Network/networkInterfaces/k8s-agentpool2-00000000-nic-1"})
existingNICForAS1 := buildDefaultTestInterface(true, []string{"/subscriptions/sub/resourceGroups/gh/providers/Microsoft.Network/loadBalancers/testCluster/backendAddressPools/testCluster"})
existingNICForAS1.VirtualMachine = &network.SubResource{
ID: to.StringPtr("/subscriptions/subscription/resourceGroups/rg/providers/Microsoft.Compute/virtualMachines/k8s-agentpool1-00000000-1"),
}
existingNICForAS2 := buildDefaultTestInterface(true, []string{"/subscriptions/sub/resourceGroups/gh/providers/Microsoft.Network/loadBalancers/testCluster/backendAddressPools/testCluster"})
existingNICForAS2.VirtualMachine = &network.SubResource{
ID: to.StringPtr("/subscriptions/subscription/resourceGroups/rg/providers/Microsoft.Compute/virtualMachines/k8s-agentpool2-00000000-1"),
}
mockVMClient := mockvmclient.NewMockInterface(ctrl)
mockVMClient.EXPECT().Get(gomock.Any(), cloud.ResourceGroup, "k8s-agentpool1-00000000-1", gomock.Any()).Return(compute.VirtualMachine{}, &retry.Error{RawError: cloudprovider.InstanceNotFound})
mockVMClient.EXPECT().Get(gomock.Any(), cloud.ResourceGroup, "k8s-agentpool2-00000000-1", gomock.Any()).Return(existingVMForAS2, nil)
cloud.VirtualMachinesClient = mockVMClient
mockNICClient := mockinterfaceclient.NewMockInterface(ctrl)
mockNICClient.EXPECT().Get(gomock.Any(), "rg", "k8s-agentpool1-00000000-nic-1", gomock.Any()).Return(existingNICForAS1, nil)
mockNICClient.EXPECT().Get(gomock.Any(), "rg", "k8s-agentpool2-00000000-nic-1", gomock.Any()).Return(existingNICForAS2, nil).Times(3)
mockNICClient.EXPECT().CreateOrUpdate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil)
cloud.InterfacesClient = mockNICClient

expectedLB := network.LoadBalancer{
Name: to.StringPtr("testCluster"),
LoadBalancerPropertiesFormat: &network.LoadBalancerPropertiesFormat{
BackendAddressPools: &[]network.BackendAddressPool{
{
Name: to.StringPtr("testCluster"),
BackendAddressPoolPropertiesFormat: &network.BackendAddressPoolPropertiesFormat{
BackendIPConfigurations: &[]network.InterfaceIPConfiguration{
{
ID: to.StringPtr("/subscriptions/subscription/resourceGroups/rg/providers/Microsoft.Network/networkInterfaces/k8s-agentpool1-00000000-nic-1/ipConfigurations/ipconfig1"),
},
},
},
},
},
},
}

shouldRemoveVMSetFromSLB := func(vmSetName string) bool {
return !strings.EqualFold(vmSetName, cloud.VMSet.GetPrimaryVMSetName()) && vmSetName != ""
}
cleanedLB, err := cloud.cleanupVMSetFromBackendPoolByCondition(&lb, &service, clusterName, shouldRemoveVMSetFromSLB)
assert.NoError(t, err)
assert.Equal(t, expectedLB, *cleanedLB)
}

func buildDefaultTestLB(name string, backendIPConfigs []string) network.LoadBalancer {
expectedLB := network.LoadBalancer{
Name: to.StringPtr(name),
Expand Down
13 changes: 9 additions & 4 deletions pkg/provider/azure_standard.go
Original file line number Diff line number Diff line change
Expand Up @@ -501,6 +501,7 @@ func (as *availabilitySet) GetInstanceIDByNodeName(name string) (string, error)

machine, err = as.getVirtualMachine(types.NodeName(name), azcache.CacheReadTypeUnsafe)
if errors.Is(err, cloudprovider.InstanceNotFound) {
klog.Warningf("Unable to find node %s: %v", name, cloudprovider.InstanceNotFound)
return "", cloudprovider.InstanceNotFound
}
if err != nil {
Expand Down Expand Up @@ -1004,11 +1005,14 @@ func (as *availabilitySet) EnsureBackendPoolDeleted(service *v1.Service, backend
for i := range ipConfigurationIDs {
ipConfigurationID := ipConfigurationIDs[i]
nodeName, _, err := as.GetNodeNameByIPConfigurationID(ipConfigurationID)
if err != nil {
if err != nil && !errors.Is(err, cloudprovider.InstanceNotFound) {
klog.Errorf("Failed to GetNodeNameByIPConfigurationID(%s): %v", ipConfigurationID, err)
allErrs = append(allErrs, err)
continue
}
if nodeName == "" {
continue
}

vmName := mapNodeNameToVMName(types.NodeName(nodeName))
nic, vmasID, err := as.getPrimaryInterfaceWithVMSet(vmName, vmSetName)
Expand Down Expand Up @@ -1139,7 +1143,8 @@ func (as *availabilitySet) GetNodeNameByIPConfigurationID(ipConfigurationID stri

vm, err := as.getVirtualMachine(types.NodeName(vmName), azcache.CacheReadTypeDefault)
if err != nil {
return "", "", fmt.Errorf("cannot get the virtual machine by node name %s", vmName)
klog.Errorf("Unable to get the virtual machine by node name %s: %v", vmName, err)
return "", "", err
}
asID := ""
if vm.VirtualMachineProperties != nil && vm.AvailabilitySet != nil {
Expand All @@ -1151,7 +1156,7 @@ func (as *availabilitySet) GetNodeNameByIPConfigurationID(ipConfigurationID stri

asName, err := getAvailabilitySetNameByID(asID)
if err != nil {
return "", "", fmt.Errorf("cannot get the availability set name by the availability set ID %s", asID)
return "", "", fmt.Errorf("cannot get the availability set name by the availability set ID %s: %v", asID, err)
}
return vmName, strings.ToLower(asName), nil
}
Expand Down Expand Up @@ -1198,7 +1203,7 @@ func (as *availabilitySet) getAvailabilitySetByNodeName(nodeName string, crt azc
}

if result == nil {
klog.Warningf("failed to find the vmas of node %s", nodeName)
klog.Warningf("Unable to find node %s: %v", nodeName, cloudprovider.InstanceNotFound)
return nil, cloudprovider.InstanceNotFound
}

Expand Down
10 changes: 8 additions & 2 deletions pkg/provider/azure_vmss.go
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,7 @@ func (ss *ScaleSet) getVmssVMByNodeIdentity(node *nodeIdentity, crt azcache.Azur
}

if !found || vm == nil {
klog.Warningf("Unable to find node %s: %v", node.nodeName, cloudprovider.InstanceNotFound)
return "", "", nil, cloudprovider.InstanceNotFound
}
return vmssName, instanceID, vm, nil
Expand Down Expand Up @@ -342,6 +343,7 @@ func (ss *ScaleSet) GetInstanceIDByNodeName(name string) (string, error) {

_, _, vm, err := ss.getVmssVM(name, azcache.CacheReadTypeUnsafe)
if err != nil {
klog.Errorf("Unable to find node %s: %v", name, err)
return "", err
}

Expand Down Expand Up @@ -389,6 +391,7 @@ func (ss *ScaleSet) GetNodeNameByProviderID(providerID string) (types.NodeName,

vm, err := ss.getVmssVMByInstanceID(resourceGroup, scaleSetName, instanceID, azcache.CacheReadTypeUnsafe)
if err != nil {
klog.Errorf("Unable to find node by providerID %s: %v", providerID, err)
return "", err
}

Expand Down Expand Up @@ -709,6 +712,7 @@ func (ss *ScaleSet) getNodeIdentityByNodeName(nodeName string, crt azcache.Azure
return nil, err
}
if node.vmssName == "" {
klog.Warningf("Unable to find node %s: %v", nodeName, cloudprovider.InstanceNotFound)
return nil, cloudprovider.InstanceNotFound
}
return node, nil
Expand All @@ -721,7 +725,7 @@ func (ss *ScaleSet) listScaleSetVMs(scaleSetName, resourceGroup string) ([]compu

allVMs, rerr := ss.VirtualMachineScaleSetVMsClient.List(ctx, resourceGroup, scaleSetName, string(compute.InstanceView))
if rerr != nil {
klog.Errorf("VirtualMachineScaleSetVMsClient.List failed: %v", rerr)
klog.Errorf("VirtualMachineScaleSetVMsClient.List(%s, %s) failed: %v", resourceGroup, scaleSetName, rerr)
if rerr.IsNotFound() {
return nil, cloudprovider.InstanceNotFound
}
Expand Down Expand Up @@ -1397,7 +1401,8 @@ func (ss *ScaleSet) GetNodeNameByIPConfigurationID(ipConfigurationID string) (st
if len(matches) != 4 {
klog.V(4).Infof("Can not extract scale set name from ipConfigurationID (%s), assuming it is managed by availability set", ipConfigurationID)
name, rg, err := ss.availabilitySet.GetNodeNameByIPConfigurationID(ipConfigurationID)
if err != nil {
if err != nil && !errors.Is(err, cloudprovider.InstanceNotFound) {
klog.Errorf("Unable to find node by IPConfigurationID %s: %v", ipConfigurationID, err)
return "", "", ErrorNotVmssInstance
}
return name, rg, nil
Expand All @@ -1408,6 +1413,7 @@ func (ss *ScaleSet) GetNodeNameByIPConfigurationID(ipConfigurationID string) (st
instanceID := matches[3]
vm, err := ss.getVmssVMByInstanceID(resourceGroup, scaleSetName, instanceID, azcache.CacheReadTypeUnsafe)
if err != nil {
klog.Errorf("Unable to find node by ipConfigurationID %s: %v", ipConfigurationID, err)
return "", "", err
}

Expand Down
1 change: 1 addition & 0 deletions pkg/provider/azure_wrap.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ func (az *Cloud) getVirtualMachine(nodeName types.NodeName, crt azcache.AzureCac
}

if cachedVM == nil {
klog.Warningf("Unable to find node %s: %v", nodeName, cloudprovider.InstanceNotFound)
return vm, cloudprovider.InstanceNotFound
}

Expand Down

0 comments on commit 75e477f

Please sign in to comment.