Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[release-1.29] fix: Skip attaching/detaching vmss vm to lb backend pool if the vm is… #5365

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
16 changes: 14 additions & 2 deletions pkg/consts/consts.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,8 +113,8 @@ const (
// StrRawVersion is the raw version string
StrRawVersion string = "raw"

// VirtualMachineScaleSetsDeallocating indicates VMSS instances are in Deallocating state.
VirtualMachineScaleSetsDeallocating = "Deallocating"
// ProvisionStateDeleting indicates VMSS instances are in Deleting state.
ProvisionStateDeleting = "Deleting"
// VmssMachineIDTemplate is the vmss manchine ID template
VmssMachineIDTemplate = "/subscriptions/%s/resourceGroups/%s/providers/Microsoft.Compute/virtualMachineScaleSets/%s/virtualMachines/%s"
// VMSetCIDRIPV4TagKey specifies the node ipv4 CIDR mask of the instances on the VMSS or VMAS
Expand All @@ -132,6 +132,8 @@ const (
ProvisioningStateDeleting = "Deleting"
// ProvisioningStateSucceeded ...
ProvisioningStateSucceeded = "Succeeded"
// ProvisioningStateUnknown is the unknown provisioning state
ProvisioningStateUnknown = "Unknown"
)

// cache
Expand Down Expand Up @@ -572,3 +574,13 @@ const (
ClusterServiceLoadBalancerHealthProbeDefaultPath = "/healthz"
SharedProbeName = "cluster-service-shared-health-probe"
)

// VM power state
const (
VMPowerStatePrefix = "PowerState/"
VMPowerStateStopped = "stopped"
VMPowerStateStopping = "stopping"
VMPowerStateDeallocated = "deallocated"
VMPowerStateDeallocating = "deallocating"
VMPowerStateUnknown = "unknown"
)
8 changes: 1 addition & 7 deletions pkg/provider/azure_instances_v1.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,6 @@ import (
var _ cloudprovider.Instances = (*Cloud)(nil)

const (
vmPowerStatePrefix = "PowerState/"
vmPowerStateStopped = "stopped"
vmPowerStateDeallocated = "deallocated"
vmPowerStateDeallocating = "deallocating"
vmPowerStateUnknown = "unknown"

// nodeNameEnvironmentName is the environment variable name for getting node name.
// It is only used for out-of-tree cloud provider.
nodeNameEnvironmentName = "NODE_NAME"
Expand Down Expand Up @@ -266,7 +260,7 @@ func (az *Cloud) InstanceShutdownByProviderID(_ context.Context, providerID stri

status := strings.ToLower(powerStatus)
provisioningSucceeded := strings.EqualFold(strings.ToLower(provisioningState), strings.ToLower(string(consts.ProvisioningStateSucceeded)))
return provisioningSucceeded && (status == vmPowerStateStopped || status == vmPowerStateDeallocated || status == vmPowerStateDeallocating), nil
return provisioningSucceeded && (status == consts.VMPowerStateStopped || status == consts.VMPowerStateDeallocated || status == consts.VMPowerStateDeallocating), nil
}

func (az *Cloud) isCurrentInstance(name types.NodeName, metadataVMName string) (bool, error) {
Expand Down
14 changes: 5 additions & 9 deletions pkg/provider/azure_standard.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,12 @@ import (
cloudprovider "k8s.io/cloud-provider"
"k8s.io/klog/v2"
"k8s.io/utils/pointer"
"k8s.io/utils/ptr"

azcache "sigs.k8s.io/cloud-provider-azure/pkg/cache"
"sigs.k8s.io/cloud-provider-azure/pkg/consts"
"sigs.k8s.io/cloud-provider-azure/pkg/metrics"
vmutil "sigs.k8s.io/cloud-provider-azure/pkg/util/vm"
)

var (
Expand Down Expand Up @@ -501,19 +503,13 @@ func (as *availabilitySet) GetPowerStatusByNodeName(name string) (powerState str
return powerState, err
}

if vm.InstanceView != nil && vm.InstanceView.Statuses != nil {
statuses := *vm.InstanceView.Statuses
for _, status := range statuses {
state := pointer.StringDeref(status.Code, "")
if strings.HasPrefix(state, vmPowerStatePrefix) {
return strings.TrimPrefix(state, vmPowerStatePrefix), nil
}
}
if vm.InstanceView != nil {
return vmutil.GetVMPowerState(ptr.Deref(vm.Name, ""), vm.InstanceView.Statuses), nil
}

// vm.InstanceView or vm.InstanceView.Statuses are nil when the VM is under deleting.
klog.V(3).Infof("InstanceView for node %q is nil, assuming it's deleting", name)
return vmPowerStateUnknown, nil
return consts.VMPowerStateUnknown, nil
}

// GetProvisioningStateByNodeName returns the provisioningState for the specified node.
Expand Down
4 changes: 2 additions & 2 deletions pkg/provider/azure_standard_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1050,7 +1050,7 @@ func TestGetStandardVMPowerStatusByNodeName(t *testing.T) {
ProvisioningState: pointer.String("Succeeded"),
},
},
expectedStatus: vmPowerStateUnknown,
expectedStatus: consts.VMPowerStateUnknown,
},
{
name: "GetPowerStatusByNodeName should get vmPowerStateUnknown if vm.InstanceView.statuses is nil",
Expand All @@ -1062,7 +1062,7 @@ func TestGetStandardVMPowerStatusByNodeName(t *testing.T) {
InstanceView: &compute.VirtualMachineInstanceView{},
},
},
expectedStatus: vmPowerStateUnknown,
expectedStatus: consts.VMPowerStateUnknown,
},
}
for _, test := range testcases {
Expand Down
49 changes: 31 additions & 18 deletions pkg/provider/azure_vmss.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,13 @@ import (
cloudprovider "k8s.io/cloud-provider"
"k8s.io/klog/v2"
"k8s.io/utils/pointer"
"k8s.io/utils/ptr"

azcache "sigs.k8s.io/cloud-provider-azure/pkg/cache"
"sigs.k8s.io/cloud-provider-azure/pkg/consts"
"sigs.k8s.io/cloud-provider-azure/pkg/metrics"
"sigs.k8s.io/cloud-provider-azure/pkg/provider/virtualmachine"
vmutil "sigs.k8s.io/cloud-provider-azure/pkg/util/vm"
)

var (
Expand Down Expand Up @@ -282,20 +284,14 @@ func (ss *ScaleSet) GetPowerStatusByNodeName(name string) (powerState string, er

if vm.IsVirtualMachineScaleSetVM() {
v := vm.AsVirtualMachineScaleSetVM()
if v.InstanceView != nil && v.InstanceView.Statuses != nil {
statuses := *v.InstanceView.Statuses
for _, status := range statuses {
state := pointer.StringDeref(status.Code, "")
if strings.HasPrefix(state, vmPowerStatePrefix) {
return strings.TrimPrefix(state, vmPowerStatePrefix), nil
}
}
if v.InstanceView != nil {
return vmutil.GetVMPowerState(ptr.Deref(v.Name, ""), v.InstanceView.Statuses), nil
}
}

// vm.InstanceView or vm.InstanceView.Statuses are nil when the VM is under deleting.
klog.V(3).Infof("InstanceView for node %q is nil, assuming it's deleting", name)
return vmPowerStateUnknown, nil
return consts.VMPowerStateUnknown, nil
}

// GetProvisioningStateByNodeName returns the provisioningState for the specified node.
Expand Down Expand Up @@ -1029,6 +1025,8 @@ func getPrimaryIPConfigFromVMSSNetworkConfig(config *compute.VirtualMachineScale
// EnsureHostInPool ensures the given VM's Primary NIC's Primary IP Configuration is
// participating in the specified LoadBalancer Backend Pool, which returns (resourceGroup, vmasName, instanceID, vmssVM, error).
func (ss *ScaleSet) EnsureHostInPool(_ *v1.Service, nodeName types.NodeName, backendPoolID string, vmSetNameOfLB string) (string, string, string, *compute.VirtualMachineScaleSetVM, error) {
logger := klog.Background().WithName("EnsureHostInPool").
WithValues("nodeName", nodeName, "backendPoolID", backendPoolID, "vmSetNameOfLB", vmSetNameOfLB)
vmName := mapNodeNameToVMName(nodeName)
vm, err := ss.getVmssVM(vmName, azcache.CacheReadTypeDefault)
if err != nil {
Expand All @@ -1037,13 +1035,20 @@ func (ss *ScaleSet) EnsureHostInPool(_ *v1.Service, nodeName types.NodeName, bac
return "", "", "", nil, nil
}

klog.Errorf("EnsureHostInPool: failed to get VMSS VM %s: %v", vmName, err)
logger.Error(err, "failed to get vmss vm", "vmName", vmName)
if !errors.Is(err, ErrorNotVmssInstance) {
return "", "", "", nil, err
}
}
statuses := vm.GetInstanceViewStatus()
vmPowerState := vmutil.GetVMPowerState(vm.Name, statuses)
provisioningState := vm.GetProvisioningState()
if vmutil.IsNotActiveVMState(provisioningState, vmPowerState) {
logger.V(2).Info("skip updating the node because it is not in an active state", "vmName", vmName, "provisioningState", provisioningState, "vmPowerState", vmPowerState)
return "", "", "", nil, nil
}

klog.V(2).Infof("ensuring node %q of scaleset %q in LB backendpool %q", nodeName, vm.VMSSName, backendPoolID)
logger.V(2).Info("ensuring the vmss node in LB backendpool", "vmss name", vm.VMSSName)

// Check scale set name:
// - For basic SKU load balancer, return nil if the node's scale set is mismatched with vmSetNameOfLB.
Expand All @@ -1057,14 +1062,13 @@ func (ss *ScaleSet) EnsureHostInPool(_ *v1.Service, nodeName types.NodeName, bac
}

if vmSetNameOfLB != "" && needCheck && !strings.EqualFold(vmSetNameOfLB, vm.VMSSName) {
klog.V(3).Infof("EnsureHostInPool skips node %s because it is not in the ScaleSet %s", vmName, vmSetNameOfLB)
logger.V(3).Info("skips the node %s because it is not in the ScaleSet %s", vmName, vmSetNameOfLB)
return "", "", "", nil, nil
}

// Find primary network interface configuration.
if vm.VirtualMachineScaleSetVMProperties.NetworkProfileConfiguration.NetworkInterfaceConfigurations == nil {
klog.V(4).Infof("EnsureHostInPool: cannot obtain the primary network interface configuration, of vm %s, "+
"probably because the vm's being deleted", vmName)
logger.V(4).Info("cannot obtain the primary network interface configuration, of the vm, probably because the vm's being deleted", "vmName", vmName)
return "", "", "", nil, nil
}

Expand Down Expand Up @@ -1114,7 +1118,7 @@ func (ss *ScaleSet) EnsureHostInPool(_ *v1.Service, nodeName types.NodeName, bac
return "", "", "", nil, err
}
if !isSameLB {
klog.V(4).Infof("Node %q has already been added to LB %q, omit adding it to a new one", nodeName, oldLBName)
logger.V(4).Info("The node has already been added to an LB, omit adding it to a new one", "lbName", oldLBName)
return "", "", "", nil, nil
}
}
Expand Down Expand Up @@ -1221,7 +1225,7 @@ func (ss *ScaleSet) ensureVMSSInPool(_ *v1.Service, nodes []*v1.Node, backendPoo

// When vmss is being deleted, CreateOrUpdate API would report "the vmss is being deleted" error.
// Since it is being deleted, we shouldn't send more CreateOrUpdate requests for it.
if vmss.ProvisioningState != nil && strings.EqualFold(*vmss.ProvisioningState, consts.VirtualMachineScaleSetsDeallocating) {
if vmss.ProvisioningState != nil && strings.EqualFold(*vmss.ProvisioningState, consts.ProvisionStateDeleting) {
klog.V(3).Infof("ensureVMSSInPool: found vmss %s being deleted, skipping", vmssName)
continue
}
Expand Down Expand Up @@ -1493,6 +1497,7 @@ func (ss *ScaleSet) EnsureHostsInPool(service *v1.Service, nodes []*v1.Node, bac
// ensureBackendPoolDeletedFromNode ensures the loadBalancer backendAddressPools deleted
// from the specified node, which returns (resourceGroup, vmasName, instanceID, vmssVM, error).
func (ss *ScaleSet) ensureBackendPoolDeletedFromNode(nodeName string, backendPoolIDs []string) (string, string, string, *compute.VirtualMachineScaleSetVM, error) {
logger := klog.Background().WithName("ensureBackendPoolDeletedFromNode").WithValues("nodeName", nodeName, "backendPoolIDs", backendPoolIDs)
vm, err := ss.getVmssVM(nodeName, azcache.CacheReadTypeDefault)
if err != nil {
if errors.Is(err, cloudprovider.InstanceNotFound) {
Expand All @@ -1503,6 +1508,14 @@ func (ss *ScaleSet) ensureBackendPoolDeletedFromNode(nodeName string, backendPoo
return "", "", "", nil, err
}

statuses := vm.GetInstanceViewStatus()
vmPowerState := vmutil.GetVMPowerState(vm.Name, statuses)
provisioningState := vm.GetProvisioningState()
if vmutil.IsNotActiveVMState(provisioningState, vmPowerState) {
logger.V(2).Info("skip updating the node because it is not in an active state", "provisioningState", provisioningState, "vmPowerState", vmPowerState)
return "", "", "", nil, nil
}

// Find primary network interface configuration.
if vm.VirtualMachineScaleSetVMProperties.NetworkProfileConfiguration.NetworkInterfaceConfigurations == nil {
klog.V(4).Infof("ensureBackendPoolDeletedFromNode: cannot obtain the primary network interface configuration, of vm %s, "+
Expand Down Expand Up @@ -1679,7 +1692,7 @@ func (ss *ScaleSet) ensureBackendPoolDeletedFromVmssUniform(backendPoolIDs []str

// When vmss is being deleted, CreateOrUpdate API would report "the vmss is being deleted" error.
// Since it is being deleted, we shouldn't send more CreateOrUpdate requests for it.
if vmss.ProvisioningState != nil && strings.EqualFold(*vmss.ProvisioningState, consts.VirtualMachineScaleSetsDeallocating) {
if vmss.ProvisioningState != nil && strings.EqualFold(*vmss.ProvisioningState, consts.ProvisionStateDeleting) {
klog.V(3).Infof("ensureBackendPoolDeletedFromVMSS: found vmss %s being deleted, skipping", pointer.StringDeref(vmss.Name, ""))
return true
}
Expand Down Expand Up @@ -2086,7 +2099,7 @@ func (ss *ScaleSet) EnsureBackendPoolDeletedFromVMSets(vmssNamesMap map[string]b

// When vmss is being deleted, CreateOrUpdate API would report "the vmss is being deleted" error.
// Since it is being deleted, we shouldn't send more CreateOrUpdate requests for it.
if vmss.ProvisioningState != nil && strings.EqualFold(*vmss.ProvisioningState, consts.VirtualMachineScaleSetsDeallocating) {
if vmss.ProvisioningState != nil && strings.EqualFold(*vmss.ProvisioningState, consts.ProvisionStateDeleting) {
klog.V(3).Infof("EnsureBackendPoolDeletedFromVMSets: found vmss %s being deleted, skipping", vmssName)
continue
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/provider/azure_vmss_repo.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ func (az *Cloud) CreateOrUpdateVMSS(resourceGroupName string, VMScaleSetName str
klog.Errorf("CreateOrUpdateVMSS: error getting vmss(%s): %v", VMScaleSetName, rerr)
return rerr
}
if vmss.ProvisioningState != nil && strings.EqualFold(*vmss.ProvisioningState, consts.VirtualMachineScaleSetsDeallocating) {
if vmss.ProvisioningState != nil && strings.EqualFold(*vmss.ProvisioningState, consts.ProvisionStateDeleting) {
klog.V(3).Infof("CreateOrUpdateVMSS: found vmss %s being deleted, skipping", VMScaleSetName)
return nil
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/provider/azure_vmss_repo_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ func TestCreateOrUpdateVMSS(t *testing.T) {
{
vmss: compute.VirtualMachineScaleSet{
VirtualMachineScaleSetProperties: &compute.VirtualMachineScaleSetProperties{
ProvisioningState: pointer.String(consts.VirtualMachineScaleSetsDeallocating),
ProvisioningState: pointer.String(consts.ProvisionStateDeleting),
},
},
},
Expand Down
29 changes: 26 additions & 3 deletions pkg/provider/azure_vmss_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import (
"k8s.io/apimachinery/pkg/util/sets"
cloudprovider "k8s.io/cloud-provider"
"k8s.io/utils/pointer"
"k8s.io/utils/ptr"

"sigs.k8s.io/cloud-provider-azure/pkg/azureclients/interfaceclient/mockinterfaceclient"
"sigs.k8s.io/cloud-provider-azure/pkg/azureclients/publicipclient/mockpublicipclient"
Expand Down Expand Up @@ -791,7 +792,7 @@ func TestGetPowerStatusByNodeName(t *testing.T) {
description: "GetPowerStatusByNodeName should return vmPowerStateUnknown when the vm.InstanceView.Statuses is nil",
vmList: []string{"vmss-vm-000001"},
nilStatus: true,
expectedPowerState: vmPowerStateUnknown,
expectedPowerState: consts.VMPowerStateUnknown,
},
}

Expand Down Expand Up @@ -2096,6 +2097,7 @@ func TestEnsureHostInPool(t *testing.T) {
isBasicLB bool
isNilVMNetworkConfigs bool
isVMBeingDeleted bool
isVMNotActive bool
expectedNodeResourceGroup string
expectedVMSSName string
expectedInstanceID string
Expand Down Expand Up @@ -2179,6 +2181,11 @@ func TestEnsureHostInPool(t *testing.T) {
},
},
},
{
description: "EnsureHostInPool should skip if the current node is not active",
nodeName: "vmss-vm-000000",
isVMNotActive: true,
},
}

for _, test := range testCases {
Expand Down Expand Up @@ -2209,6 +2216,11 @@ func TestEnsureHostInPool(t *testing.T) {
if test.isNilVMNetworkConfigs {
expectedVMSSVMs[0].NetworkProfileConfiguration.NetworkInterfaceConfigurations = nil
}
if test.isVMNotActive {
(*expectedVMSSVMs[0].InstanceView.Statuses)[0] = compute.InstanceViewStatus{
Code: ptr.To("PowerState/deallocated"),
}
}
mockVMSSVMClient := ss.VirtualMachineScaleSetVMsClient.(*mockvmssvmclient.MockInterface)
mockVMSSVMClient.EXPECT().List(
gomock.Any(),
Expand Down Expand Up @@ -2438,7 +2450,7 @@ func TestEnsureVMSSInPool(t *testing.T) {

expectedVMSS := buildTestVMSSWithLB(testVMSSName, "vmss-vm-", []string{testLBBackendpoolID0}, test.setIPv6Config)
if test.isVMSSDeallocating {
expectedVMSS.ProvisioningState = pointer.String(consts.VirtualMachineScaleSetsDeallocating)
expectedVMSS.ProvisioningState = pointer.String(consts.ProvisionStateDeleting)
}
if test.isVMSSNilNICConfig {
expectedVMSS.VirtualMachineProfile.NetworkProfile.NetworkInterfaceConfigurations = nil
Expand Down Expand Up @@ -2568,6 +2580,7 @@ func TestEnsureBackendPoolDeletedFromNodeCommon(t *testing.T) {
nodeName string
backendpoolIDs []string
isNilVMNetworkConfigs bool
isVMNotActive bool
expectedNodeResourceGroup string
expectedVMSSName string
expectedInstanceID string
Expand Down Expand Up @@ -2630,6 +2643,11 @@ func TestEnsureBackendPoolDeletedFromNodeCommon(t *testing.T) {
},
},
},
{
description: "ensureBackendPoolDeletedFromNode should skip if the node is not active",
nodeName: "vmss-vm-000000",
isVMNotActive: true,
},
}

for _, test := range testCases {
Expand All @@ -2646,6 +2664,11 @@ func TestEnsureBackendPoolDeletedFromNodeCommon(t *testing.T) {
if test.isNilVMNetworkConfigs {
expectedVMSSVMs[0].NetworkProfileConfiguration.NetworkInterfaceConfigurations = nil
}
if test.isVMNotActive {
(*expectedVMSSVMs[0].InstanceView.Statuses)[0] = compute.InstanceViewStatus{
Code: ptr.To("PowerState/deallocated"),
}
}
mockVMSSVMClient := ss.VirtualMachineScaleSetVMsClient.(*mockvmssvmclient.MockInterface)
mockVMSSVMClient.EXPECT().List(gomock.Any(), ss.ResourceGroup, testVMSSName, gomock.Any()).Return(expectedVMSSVMs, nil).AnyTimes()

Expand Down Expand Up @@ -2726,7 +2749,7 @@ func TestEnsureBackendPoolDeletedFromVMSS(t *testing.T) {

expectedVMSS := buildTestVMSSWithLB(testVMSSName, "vmss-vm-", []string{testLBBackendpoolID0}, false)
if test.isVMSSDeallocating {
expectedVMSS.ProvisioningState = pointer.String(consts.VirtualMachineScaleSetsDeallocating)
expectedVMSS.ProvisioningState = pointer.String(consts.ProvisionStateDeleting)
}
if test.isVMSSNilNICConfig {
expectedVMSS.VirtualMachineProfile.NetworkProfile.NetworkInterfaceConfigurations = nil
Expand Down