Skip to content

Commit

Permalink
Make availability set reconcile/delete async
Browse files Browse the repository at this point in the history
  • Loading branch information
Jont828 committed Dec 1, 2021
1 parent 070d46f commit 1c20edc
Show file tree
Hide file tree
Showing 9 changed files with 548 additions and 295 deletions.
64 changes: 60 additions & 4 deletions azure/scope/machine.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,11 @@ import (
"context"
"encoding/base64"
"encoding/json"
"strconv"
"strings"
"time"

"github.com/Azure/azure-sdk-for-go/profiles/latest/compute/mgmt/compute"
"github.com/Azure/go-autorest/autorest/to"
"github.com/go-logr/logr"
"github.com/pkg/errors"
Expand All @@ -39,6 +41,7 @@ import (

infrav1 "sigs.k8s.io/cluster-api-provider-azure/api/v1beta1"
"sigs.k8s.io/cluster-api-provider-azure/azure"
"sigs.k8s.io/cluster-api-provider-azure/azure/services/availabilitysets"
"sigs.k8s.io/cluster-api-provider-azure/azure/services/disks"
"sigs.k8s.io/cluster-api-provider-azure/azure/services/resourceskus"
"sigs.k8s.io/cluster-api-provider-azure/azure/services/virtualmachines"
Expand Down Expand Up @@ -102,9 +105,10 @@ type MachineScope struct {

// MachineCache stores common machine information so we don't have to hit the API multiple times within the same reconcile loop.
type MachineCache struct {
BootstrapData string
VMImage *infrav1.Image
VMSKU resourceskus.SKU
BootstrapData string
VMImage *infrav1.Image
VMSKU resourceskus.SKU
availabilitySetSKU resourceskus.SKU
}

// InitMachineCache sets cached information about the machine to be used in the scope.
Expand All @@ -130,9 +134,16 @@ func (m *MachineScope) InitMachineCache(ctx context.Context) error {
if err != nil {
return err
}

m.cache.VMSKU, err = skuCache.Get(ctx, m.AzureMachine.Spec.VMSize, resourceskus.VirtualMachines)
if err != nil {
return azure.WithTerminalError(errors.Wrapf(err, "failed to get SKU %s in compute api", m.AzureMachine.Spec.VMSize))
return azure.WithTerminalError(errors.Wrapf(err, "failed to get VM SKU %s in compute api", m.AzureMachine.Spec.VMSize))
}

m.cache.availabilitySetSKU, err = skuCache.Get(ctx, string(compute.AvailabilitySetSkuTypesAligned), resourceskus.AvailabilitySets)
if err != nil {
// TODO: verify error message
return azure.WithTerminalError(errors.Wrapf(err, "failed to get availability set SKU %s in compute api", string(compute.AvailabilitySetSkuTypesAligned)))
}
}

Expand Down Expand Up @@ -371,6 +382,51 @@ func (m *MachineScope) ProviderID() string {
return parsed.String()
}

// AvailabilitySet returns the availability set for this machine if available.
func (m *MachineScope) AvailabilitySetSpec() (azure.ResourceSpecGetter, error) {
if !m.AvailabilitySetEnabled() {
// Availability set not enabled, not an error
return nil, nil
}
spec := &availabilitysets.AvailabilitySetSpec{
ResourceGroup: m.ResourceGroup(),
ClusterName: m.ClusterName(),
Location: m.Location(),
AdditionalTags: m.AdditionalTags(),
}

if m.IsControlPlane() {
spec.Name = azure.GenerateAvailabilitySetName(m.ClusterName(), azure.ControlPlaneNodeGroup)
} else if mdName, ok := m.Machine.Labels[clusterv1.MachineDeploymentLabelName]; ok {
// get machine deployment name from labels for machines that maybe part of a machine deployment.
spec.Name = azure.GenerateAvailabilitySetName(m.ClusterName(), mdName)
} else if msName, ok := m.Machine.Labels[clusterv1.MachineSetLabelName]; ok {
// if machine deployment name label is not available, use machine set name.
spec.Name = azure.GenerateAvailabilitySetName(m.ClusterName(), msName)
} else {
// Name not available, not an error
return nil, nil
}

if m.cache != nil {
asSKU := m.cache.availabilitySetSKU
faultDomainCountStr, ok := asSKU.GetCapability(resourceskus.MaximumPlatformFaultDomainCount)
if !ok {
return nil, errors.Errorf("cannot find capability %s sku %s", resourceskus.MaximumPlatformFaultDomainCount, asSKU.Name)
}

faultDomainCount, err := strconv.ParseUint(faultDomainCountStr, 10, 32)
if err != nil {
return nil, errors.Wrap(err, "failed to determine max fault domain count")
}

spec.FaultDomainCount = faultDomainCount
return spec, nil
} else {
return nil, errors.Errorf("cache not initialized for availability set")
}
}

// AvailabilitySet returns the availability set for this machine if available.
func (m *MachineScope) AvailabilitySet() (string, bool) {
if !m.AvailabilitySetEnabled() {
Expand Down
118 changes: 41 additions & 77 deletions azure/services/availabilitysets/availabilitysets.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,137 +18,101 @@ package availabilitysets

import (
"context"
"strconv"

"github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2021-04-01/compute"
"github.com/Azure/go-autorest/autorest/to"
"github.com/go-logr/logr"
"github.com/pkg/errors"

infrav1 "sigs.k8s.io/cluster-api-provider-azure/api/v1beta1"
"sigs.k8s.io/cluster-api-provider-azure/azure"
"sigs.k8s.io/cluster-api-provider-azure/azure/converters"
"sigs.k8s.io/cluster-api-provider-azure/azure/services/async"
"sigs.k8s.io/cluster-api-provider-azure/azure/services/resourceskus"
"sigs.k8s.io/cluster-api-provider-azure/util/reconciler"
"sigs.k8s.io/cluster-api-provider-azure/util/tele"
)

const serviceName = "availabilitysets"

// AvailabilitySetScope defines the scope interface for a availability sets service.
type AvailabilitySetScope interface {
logr.Logger
azure.ClusterDescriber
AvailabilitySet() (string, bool)
azure.AsyncStatusUpdater
AvailabilitySetSpec() (azure.ResourceSpecGetter, error)
}

// Service provides operations on Azure resources.
type Service struct {
Scope AvailabilitySetScope
Client
async.Reconciler
resourceSKUCache *resourceskus.Cache
}

// New creates a new availability sets service.
func New(scope AvailabilitySetScope, skuCache *resourceskus.Cache) *Service {
Client := NewClient(scope)
return &Service{
Scope: scope,
Client: NewClient(scope),
Client: Client,
resourceSKUCache: skuCache,
Reconciler: async.New(scope, Client, Client),
}
}

// Reconcile creates or updates availability sets.
func (s *Service) Reconcile(ctx context.Context) error {
ctx, _, done := tele.StartSpanWithLogger(
ctx,
"availabilitysets.Service.Reconcile",
)
ctx, _, done := tele.StartSpanWithLogger(ctx, "availabilitysets.Service.Reconcile")
defer done()

availabilitySetName, ok := s.Scope.AvailabilitySet()
if !ok {
return nil
}

asSku, err := s.resourceSKUCache.Get(ctx, string(compute.AvailabilitySetSkuTypesAligned), resourceskus.AvailabilitySets)
if err != nil {
return errors.Wrap(err, "failed to get availability sets sku")
}

faultDomainCountStr, ok := asSku.GetCapability(resourceskus.MaximumPlatformFaultDomainCount)
if !ok {
return errors.Errorf("cannot find capability %s sku %s", resourceskus.MaximumPlatformFaultDomainCount, *asSku.Name)
}

faultDomainCount, err := strconv.ParseUint(faultDomainCountStr, 10, 32)
if err != nil {
return errors.Wrap(err, "failed to determine max fault domain count")
}

s.Scope.V(2).Info("creating availability set", "availability set", availabilitySetName)

asParams := compute.AvailabilitySet{
Sku: &compute.Sku{
Name: to.StringPtr(string(compute.AvailabilitySetSkuTypesAligned)),
},
AvailabilitySetProperties: &compute.AvailabilitySetProperties{
PlatformFaultDomainCount: to.Int32Ptr(int32(faultDomainCount)),
},
Tags: converters.TagsToMap(infrav1.Build(infrav1.BuildParams{
ClusterName: s.Scope.ClusterName(),
Lifecycle: infrav1.ResourceLifecycleOwned,
Name: to.StringPtr(availabilitySetName),
Role: to.StringPtr(infrav1.CommonRole),
Additional: s.Scope.AdditionalTags(),
})),
Location: to.StringPtr(s.Scope.Location()),
}
ctx, cancel := context.WithTimeout(ctx, reconciler.DefaultAzureServiceReconcileTimeout)
defer cancel()

_, err = s.Client.CreateOrUpdate(ctx, s.Scope.ResourceGroup(), availabilitySetName, asParams)
setSpec, err := s.Scope.AvailabilitySetSpec()
if err != nil {
return errors.Wrapf(err, "failed to create availability set %s", availabilitySetName)
s.Scope.UpdatePutStatus(infrav1.AvailabilitySetReadyCondition, serviceName, err)
return err
}

s.Scope.V(2).Info("successfully created availability set", "availability set", availabilitySetName)

return nil
_, err = s.CreateResource(ctx, setSpec, serviceName)
s.Scope.UpdatePutStatus(infrav1.AvailabilitySetReadyCondition, serviceName, err)
return err
}

// Delete deletes availability sets.
func (s *Service) Delete(ctx context.Context) error {
ctx, _, done := tele.StartSpanWithLogger(ctx, "availabilitysets.Service.Delete")

ctx, _, done := tele.StartSpanWithLogger(ctx, "virtualmachines.Service.Delete")
defer done()

availabilitySetName, ok := s.Scope.AvailabilitySet()
if !ok {
return nil
}
ctx, cancel := context.WithTimeout(ctx, reconciler.DefaultAzureServiceReconcileTimeout)
defer cancel()

as, err := s.Client.Get(ctx, s.Scope.ResourceGroup(), availabilitySetName)
if err != nil && azure.ResourceNotFound(err) {
// already deleted
return nil
setSpec, err := s.Scope.AvailabilitySetSpec()
if err != nil {
s.Scope.UpdateDeleteStatus(infrav1.AvailabilitySetReadyCondition, serviceName, err)
return err
}

existingSet, err := s.Client.Get(ctx, setSpec)
if err != nil {
return errors.Wrapf(err, "failed to get availability set %s in resource group %s", availabilitySetName, s.Scope.ResourceGroup())
if azure.ResourceNotFound(err) {
// already deleted
return nil
}
return errors.Wrapf(err, "failed to get availability set %s in resource group %s", setSpec.ResourceName(), setSpec.ResourceGroupName())
}

// only delete when the availability set does not have any vms
if as.AvailabilitySetProperties != nil && as.VirtualMachines != nil && len(*as.VirtualMachines) > 0 {
return nil
availabilitySet, ok := existingSet.(compute.AvailabilitySet)
if !ok {
return errors.Errorf("%T is not a compute.AvailabilitySet", existingSet)
}

s.Scope.V(2).Info("deleting availability set", "availability set", availabilitySetName)
err = s.Client.Delete(ctx, s.Scope.ResourceGroup(), availabilitySetName)
if err != nil && azure.ResourceNotFound(err) {
// already deleted
// only delete when the availability set does not have any vms
if availabilitySet.AvailabilitySetProperties != nil && availabilitySet.VirtualMachines != nil && len(*availabilitySet.VirtualMachines) > 0 {
return nil
}

if err != nil {
return errors.Wrapf(err, "failed to delete availability set %s in resource group %s", availabilitySetName, s.Scope.ResourceGroup())
}

s.Scope.V(2).Info("successfully delete availability set", "availability set", availabilitySetName)

return nil
err = s.DeleteResource(ctx, setSpec, serviceName)
s.Scope.UpdateDeleteStatus(infrav1.VMRunningCondition, serviceName, err)
return err
}
Loading

0 comments on commit 1c20edc

Please sign in to comment.