Skip to content

Commit

Permalink
Merge pull request #1800 from devigned/fix-managedcluster-diff
Browse files Browse the repository at this point in the history
fix managed cluster diff so aks added fields are omitted
  • Loading branch information
k8s-ci-robot committed Nov 11, 2021
2 parents 27add62 + 6a9b3b9 commit 0ac1bef
Show file tree
Hide file tree
Showing 23 changed files with 143 additions and 70 deletions.
5 changes: 4 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ KUBETEST_WINDOWS_CONF_PATH ?= $(abspath $(E2E_DATA_DIR)/kubetest/upstream-window
KUBETEST_REPO_LIST_PATH ?= $(abspath $(E2E_DATA_DIR)/kubetest/)
AZURE_TEMPLATES := $(E2E_DATA_DIR)/infrastructure-azure

# use the project local tool binaries first
export PATH := $(TOOLS_BIN_DIR):$(PATH)

# set --output-base used for conversion-gen which needs to be different for in GOPATH and outside GOPATH dev
ifneq ($(abspath $(ROOT_DIR)),$(GOPATH)/src/sigs.k8s.io/cluster-api-provider-azure)
OUTPUT_BASE := --output-base=$(ROOT_DIR)
Expand Down Expand Up @@ -169,7 +172,7 @@ test-cover: envs-test $(KUBECTL) $(KUBE_APISERVER) $(ETCD) ## Run tests with cod
go tool cover -html=coverage.out -o coverage.html

.PHONY: test-e2e-run
test-e2e-run: generate-e2e-templates $(ENVSUBST) $(KUBECTL) $(GINKGO) ## Run e2e tests
test-e2e-run: generate-e2e-templates $(ENVSUBST) $(KUSTOMIZE) $(KUBECTL) $(GINKGO) ## Run e2e tests
$(ENVSUBST) < $(E2E_CONF_FILE) > $(E2E_CONF_FILE_ENVSUBST) && \
$(GINKGO) -v -trace -tags=e2e -focus="$(GINKGO_FOCUS)" -skip="$(GINKGO_SKIP)" -nodes=$(GINKGO_NODES) --noColor=$(GINKGO_NOCOLOR) $(GINKGO_ARGS) ./test/e2e -- \
-e2e.artifacts-folder="$(ARTIFACTS)" \
Expand Down
2 changes: 1 addition & 1 deletion Tiltfile
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ settings = {
"capi_version": "v1.0.0",
"cert_manager_version": "v1.1.0",
"kubernetes_version": "v1.22.2",
"aks_kubernetes_version": "v1.20.5"
"aks_kubernetes_version": "v1.22.1"
}

keys = ["AZURE_SUBSCRIPTION_ID", "AZURE_TENANT_ID", "AZURE_CLIENT_SECRET", "AZURE_CLIENT_ID"]
Expand Down
2 changes: 1 addition & 1 deletion azure/scope/managedcontrolplane.go
Original file line number Diff line number Diff line change
Expand Up @@ -424,7 +424,7 @@ func (s *ManagedControlPlaneScope) ManagedClusterSpec() (azure.ManagedClusterSpe

if s.ControlPlane.Spec.SKU != nil {
managedClusterSpec.SKU = &azure.SKU{
Tier: s.ControlPlane.Spec.SKU.Tier,
Tier: string(s.ControlPlane.Spec.SKU.Tier),
}
}

Expand Down
23 changes: 12 additions & 11 deletions azure/services/agentpools/agentpools.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,12 @@ package agentpools
import (
"context"
"fmt"
"time"

"github.com/Azure/azure-sdk-for-go/services/containerservice/mgmt/2021-05-01/containerservice"
"github.com/go-logr/logr"
"github.com/google/go-cmp/cmp"
"github.com/pkg/errors"
"k8s.io/klog/v2"

infrav1alpha4 "sigs.k8s.io/cluster-api-provider-azure/api/v1beta1"
"sigs.k8s.io/cluster-api-provider-azure/azure"
"sigs.k8s.io/cluster-api-provider-azure/util/tele"
Expand Down Expand Up @@ -59,7 +58,7 @@ func New(scope ManagedMachinePoolScope) *Service {

// Reconcile idempotently creates or updates a agent pool, if possible.
func (s *Service) Reconcile(ctx context.Context) error {
ctx, _, done := tele.StartSpanWithLogger(
ctx, log, done := tele.StartSpanWithLogger(
ctx,
"agentpools.Service.Reconcile",
)
Expand Down Expand Up @@ -92,15 +91,17 @@ func (s *Service) Reconcile(ctx context.Context) error {
isCreate := azure.ResourceNotFound(err)
if isCreate {
err = s.Client.CreateOrUpdate(ctx, agentPoolSpec.ResourceGroup, agentPoolSpec.Cluster, agentPoolSpec.Name, profile)
if err != nil {
if err != nil && azure.ResourceNotFound(err) {
return azure.WithTransientError(errors.Wrap(err, "agent pool dependent resource does not exist yet"), 20*time.Second)
} else if err != nil {
return errors.Wrap(err, "failed to create or update agent pool")
}
} else {
ps := *existingPool.ManagedClusterAgentPoolProfileProperties.ProvisioningState
if ps != string(infrav1alpha4.Canceled) && ps != string(infrav1alpha4.Failed) && ps != string(infrav1alpha4.Succeeded) {
msg := fmt.Sprintf("Unable to update existing agent pool in non terminal state. Agent pool must be in one of the following provisioning states: canceled, failed, or succeeded. Actual state: %s", ps)
klog.V(2).Infof(msg)
return errors.New(msg)
log.V(2).Info(msg)
return azure.WithTransientError(errors.New(msg), 20*time.Second)
}

// Normalize individual agent pools to diff in case we need to update
Expand All @@ -123,13 +124,13 @@ func (s *Service) Reconcile(ctx context.Context) error {
// Diff and check if we require an update
diff := cmp.Diff(existingProfile, normalizedProfile)
if diff != "" {
klog.V(2).Infof("Update required (+new -old):\n%s", diff)
log.V(2).Info(fmt.Sprintf("Update required (+new -old):\n%s", diff))
err = s.Client.CreateOrUpdate(ctx, agentPoolSpec.ResourceGroup, agentPoolSpec.Cluster, agentPoolSpec.Name, profile)
if err != nil {
return errors.Wrap(err, "failed to create or update agent pool")
}
} else {
klog.V(2).Infof("Normalized and desired agent pool matched, no update needed")
log.V(2).Info("Normalized and desired agent pool matched, no update needed")
}
}

Expand All @@ -138,15 +139,15 @@ func (s *Service) Reconcile(ctx context.Context) error {

// Delete deletes the virtual network with the provided name.
func (s *Service) Delete(ctx context.Context) error {
ctx, _, done := tele.StartSpanWithLogger(
ctx, log, done := tele.StartSpanWithLogger(
ctx,
"agentpools.Service.Delete",
)
defer done()

agentPoolSpec := s.scope.AgentPoolSpec()

klog.V(2).Infof("deleting agent pool %s ", agentPoolSpec.Name)
log.V(2).Info(fmt.Sprintf("deleting agent pool %s ", agentPoolSpec.Name))
err := s.Client.Delete(ctx, agentPoolSpec.ResourceGroup, agentPoolSpec.Cluster, agentPoolSpec.Name)
if err != nil {
if azure.ResourceNotFound(err) {
Expand All @@ -156,6 +157,6 @@ func (s *Service) Delete(ctx context.Context) error {
return errors.Wrapf(err, "failed to delete agent pool %s in resource group %s", agentPoolSpec.Name, agentPoolSpec.ResourceGroup)
}

klog.V(2).Infof("Successfully deleted agent pool %s ", agentPoolSpec.Name)
log.V(2).Info(fmt.Sprintf("Successfully deleted agent pool %s ", agentPoolSpec.Name))
return nil
}
10 changes: 6 additions & 4 deletions azure/services/agentpools/agentpools_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,10 @@ func TestReconcile(t *testing.T) {
provisioningStatesToTest: []string{"Canceled", "Succeeded", "Failed"},
expectedError: "",
expect: func(m *mock_agentpools.MockClientMockRecorder, provisioningstate string) {
pv := provisioningstate
m.CreateOrUpdate(gomockinternal.AContext(), "my-rg", "my-cluster", "my-agentpool", gomock.Any()).Return(nil)
m.Get(gomockinternal.AContext(), "my-rg", "my-cluster", "my-agentpool").Return(containerservice.AgentPool{ManagedClusterAgentPoolProfileProperties: &containerservice.ManagedClusterAgentPoolProfileProperties{
ProvisioningState: &provisioningstate,
ProvisioningState: &pv,
}}, nil)
},
},
Expand All @@ -69,7 +70,7 @@ func TestReconcile(t *testing.T) {
Name: "my-agentpool",
},
provisioningStatesToTest: []string{"Deleting", "InProgress", "randomStringHere"},
expectedError: "Unable to update existing agent pool in non terminal state. Agent pool must be in one of the following provisioning states: canceled, failed, or succeeded. Actual state: randomStringHere",
expectedError: "Unable to update existing agent pool in non terminal state. Agent pool must be in one of the following provisioning states: canceled, failed, or succeeded. Actual state:",
expect: func(m *mock_agentpools.MockClientMockRecorder, provisioningstate string) {
m.Get(gomockinternal.AContext(), "my-rg", "my-cluster", "my-agentpool").Return(containerservice.AgentPool{ManagedClusterAgentPoolProfileProperties: &containerservice.ManagedClusterAgentPoolProfileProperties{
ProvisioningState: &provisioningstate,
Expand All @@ -80,8 +81,9 @@ func TestReconcile(t *testing.T) {

for _, tc := range provisioningstatetestcases {
for _, provisioningstate := range tc.provisioningStatesToTest {
t.Logf("Testing agentpool provision state: " + provisioningstate)
tc := tc
provisioningstate := provisioningstate
t.Logf("Testing agentpool provision state: " + provisioningstate)
t.Run(tc.name, func(t *testing.T) {
g := NewWithT(t)
t.Parallel()
Expand Down Expand Up @@ -119,7 +121,7 @@ func TestReconcile(t *testing.T) {

err := s.Reconcile(context.TODO())
if tc.expectedError != "" {
g.Expect(err.Error()).To(HavePrefix(tc.expectedError))
g.Expect(err.Error()).To(ContainSubstring(tc.expectedError))
g.Expect(err.Error()).To(ContainSubstring(provisioningstate))
} else {
g.Expect(err).NotTo(HaveOccurred())
Expand Down
15 changes: 13 additions & 2 deletions azure/services/managedclusters/managedclusters.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"context"
"fmt"
"net"
"time"

"github.com/Azure/azure-sdk-for-go/services/containerservice/mgmt/2021-05-01/containerservice"
"github.com/Azure/go-autorest/autorest/to"
Expand Down Expand Up @@ -159,7 +160,7 @@ func (s *Service) Reconcile(ctx context.Context) error {
existingMC, err := s.Client.Get(ctx, managedClusterSpec.ResourceGroupName, managedClusterSpec.Name)
// Transient or other failure not due to 404
if err != nil && !azure.ResourceNotFound(err) {
return errors.Wrap(err, "failed to fetch existing managed cluster")
return azure.WithTransientError(errors.Wrap(err, "failed to fetch existing managed cluster"), 20*time.Second)
}

// We are creating this cluster for the first time.
Expand Down Expand Up @@ -300,7 +301,17 @@ func (s *Service) Reconcile(ctx context.Context) error {
if ps != string(infrav1alpha4.Canceled) && ps != string(infrav1alpha4.Failed) && ps != string(infrav1alpha4.Succeeded) {
msg := fmt.Sprintf("Unable to update existing managed cluster in non terminal state. Managed cluster must be in one of the following provisioning states: canceled, failed, or succeeded. Actual state: %s", ps)
klog.V(2).Infof(msg)
return errors.New(msg)
return azure.WithTransientError(errors.New(msg), 20*time.Second)
}

// Normalize the LoadBalancerProfile so the diff below doesn't get thrown off by AKS added properties.
if managedCluster.NetworkProfile.LoadBalancerProfile == nil {
// If our LoadBalancerProfile generated by the spec is nil, then don't worry about what AKS has added.
existingMC.NetworkProfile.LoadBalancerProfile = nil
} else {
// If our LoadBalancerProfile generated by the spec is not nil, then remove the effective outbound IPs from
// AKS.
existingMC.NetworkProfile.LoadBalancerProfile.EffectiveOutboundIPs = nil
}

diff := computeDiffOfNormalizedClusters(managedCluster, existingMC)
Expand Down
3 changes: 2 additions & 1 deletion azure/services/managedclusters/managedclusters_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ func TestReconcile(t *testing.T) {
m.Get(gomockinternal.AContext(), "my-rg", "my-managedcluster").Return(containerservice.ManagedCluster{ManagedClusterProperties: &containerservice.ManagedClusterProperties{
Fqdn: pointer.String("my-managedcluster-fqdn"),
ProvisioningState: &provisioningstate,
NetworkProfile: &containerservice.NetworkProfile{},
}}, nil)
m.GetCredentials(gomockinternal.AContext(), "my-rg", "my-managedcluster").Times(1)
s.ClusterName().AnyTimes().Return("my-managedcluster")
Expand Down Expand Up @@ -102,7 +103,7 @@ func TestReconcile(t *testing.T) {
err := s.Reconcile(context.TODO())
if tc.expectedError != "" {
g.Expect(err).To(HaveOccurred())
g.Expect(err.Error()).To(HavePrefix(tc.expectedError))
g.Expect(err.Error()).To(ContainSubstring(tc.expectedError))
g.Expect(err.Error()).To(ContainSubstring(provisioningstate))
} else {
g.Expect(err).NotTo(HaveOccurred())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -740,7 +740,7 @@ spec:
of AzureManagedControlPlane.
properties:
initialized:
description: Initialized is true when the the control plane is available
description: Initialized is true when the control plane is available
for initial contact. This may occur before the control plane is
fully ready. In the AzureManagedControlPlane implementation, these
are identical.
Expand Down
4 changes: 2 additions & 2 deletions exp/api/v1alpha4/zz_generated.conversion.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions exp/api/v1beta1/azuremanagedcontrolplane_default.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,3 +73,11 @@ func (r *AzureManagedControlPlane) setDefaultSubnet() {
r.Spec.VirtualNetwork.Subnet.CIDRBlock = defaultAKSNodeSubnetCIDR
}
}

func (r *AzureManagedControlPlane) setDefaultSku() {
if r.Spec.SKU == nil {
r.Spec.SKU = &SKU{
Tier: FreeManagedControlPlaneTier,
}
}
}
16 changes: 13 additions & 3 deletions exp/api/v1beta1/azuremanagedcontrolplane_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,11 +122,21 @@ type AADProfile struct {
AdminGroupObjectIDs []string `json:"adminGroupObjectIDs"`
}

// AzureManagedControlPlaneSkuTier - Tier of a managed cluster SKU.
// +kubebuilder:validation:Enum=Free;Paid
type AzureManagedControlPlaneSkuTier string

const (
// FreeManagedControlPlaneTier is the free tier of AKS without corresponding SLAs.
FreeManagedControlPlaneTier AzureManagedControlPlaneSkuTier = "Free"
// PaidManagedControlPlaneTier is the paid tier of AKS with corresponding SLAs.
PaidManagedControlPlaneTier AzureManagedControlPlaneSkuTier = "Paid"
)

// SKU - AKS SKU.
type SKU struct {
// Tier - Tier of a managed cluster SKU.
// +kubebuilder:validation:Enum=Free;Paid
Tier string `json:"tier"`
Tier AzureManagedControlPlaneSkuTier `json:"tier"`
}

// LoadBalancerProfile - Profile of the cluster load balancer.
Expand Down Expand Up @@ -194,7 +204,7 @@ type AzureManagedControlPlaneStatus struct {
// +optional
Ready bool `json:"ready,omitempty"`

// Initialized is true when the the control plane is available for initial contact.
// Initialized is true when the control plane is available for initial contact.
// This may occur before the control plane is fully ready.
// In the AzureManagedControlPlane implementation, these are identical.
// +optional
Expand Down
1 change: 1 addition & 0 deletions exp/api/v1beta1/azuremanagedcontrolplane_webhook.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ func (r *AzureManagedControlPlane) Default() {
r.setDefaultNodeResourceGroupName()
r.setDefaultVirtualNetwork()
r.setDefaultSubnet()
r.setDefaultSku()
}

// +kubebuilder:webhook:verbs=create;update,path=/validate-infrastructure-cluster-x-k8s-io-v1beta1-azuremanagedcontrolplane,mutating=false,failurePolicy=fail,groups=infrastructure.cluster.x-k8s.io,resources=azuremanagedcontrolplanes,versions=v1beta1,name=validation.azuremanagedcontrolplanes.infrastructure.cluster.x-k8s.io,sideEffects=None,admissionReviewVersions=v1;v1beta1
Expand Down
3 changes: 3 additions & 0 deletions exp/api/v1beta1/azuremanagedcontrolplane_webhook_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ func TestDefaultingWebhook(t *testing.T) {
g.Expect(amcp.Spec.NodeResourceGroupName).To(Equal("MC_fooRg_fooName_fooLocation"))
g.Expect(amcp.Spec.VirtualNetwork.Name).To(Equal("fooName"))
g.Expect(amcp.Spec.VirtualNetwork.Subnet.Name).To(Equal("fooName"))
g.Expect(amcp.Spec.SKU.Tier).To(Equal(FreeManagedControlPlaneTier))

t.Logf("Testing amcp defaulting webhook with baseline")
netPlug := "kubenet"
Expand All @@ -62,6 +63,7 @@ func TestDefaultingWebhook(t *testing.T) {
amcp.Spec.NodeResourceGroupName = "fooNodeRg"
amcp.Spec.VirtualNetwork.Name = "fooVnetName"
amcp.Spec.VirtualNetwork.Subnet.Name = "fooSubnetName"
amcp.Spec.SKU.Tier = PaidManagedControlPlaneTier
amcp.Default()
g.Expect(*amcp.Spec.NetworkPlugin).To(Equal(netPlug))
g.Expect(*amcp.Spec.LoadBalancerSKU).To(Equal(lbSKU))
Expand All @@ -71,6 +73,7 @@ func TestDefaultingWebhook(t *testing.T) {
g.Expect(amcp.Spec.NodeResourceGroupName).To(Equal("fooNodeRg"))
g.Expect(amcp.Spec.VirtualNetwork.Name).To(Equal("fooVnetName"))
g.Expect(amcp.Spec.VirtualNetwork.Subnet.Name).To(Equal("fooSubnetName"))
g.Expect(amcp.Spec.SKU.Tier).To(Equal(PaidManagedControlPlaneTier))
}

func TestValidatingWebhook(t *testing.T) {
Expand Down
36 changes: 27 additions & 9 deletions exp/controllers/azuremanagedcontrolplane_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,14 @@ import (
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/client-go/tools/record"
infrav1 "sigs.k8s.io/cluster-api-provider-azure/api/v1beta1"
"sigs.k8s.io/cluster-api-provider-azure/azure"
"sigs.k8s.io/cluster-api-provider-azure/azure/scope"
infracontroller "sigs.k8s.io/cluster-api-provider-azure/controllers"
infrav1exp "sigs.k8s.io/cluster-api-provider-azure/exp/api/v1beta1"
"sigs.k8s.io/cluster-api-provider-azure/pkg/coalescing"
"sigs.k8s.io/cluster-api-provider-azure/util/reconciler"
"sigs.k8s.io/cluster-api-provider-azure/util/tele"
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
clusterv1exp "sigs.k8s.io/cluster-api/exp/api/v1beta1"
"sigs.k8s.io/cluster-api/util"
Expand All @@ -37,14 +45,6 @@ import (
"sigs.k8s.io/controller-runtime/pkg/handler"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
"sigs.k8s.io/controller-runtime/pkg/source"

infrav1 "sigs.k8s.io/cluster-api-provider-azure/api/v1beta1"
"sigs.k8s.io/cluster-api-provider-azure/azure/scope"
infracontroller "sigs.k8s.io/cluster-api-provider-azure/controllers"
infrav1exp "sigs.k8s.io/cluster-api-provider-azure/exp/api/v1beta1"
"sigs.k8s.io/cluster-api-provider-azure/pkg/coalescing"
"sigs.k8s.io/cluster-api-provider-azure/util/reconciler"
"sigs.k8s.io/cluster-api-provider-azure/util/tele"
)

// AzureManagedControlPlaneReconciler reconciles an AzureManagedControlPlane object.
Expand Down Expand Up @@ -209,17 +209,35 @@ func (amcpr *AzureManagedControlPlaneReconciler) reconcileNormal(ctx context.Con
controllerutil.AddFinalizer(scope.ControlPlane, infrav1.ClusterFinalizer)
// Register the finalizer immediately to avoid orphaning Azure resources on delete
if err := scope.PatchObject(ctx); err != nil {
amcpr.Recorder.Eventf(scope.ControlPlane, corev1.EventTypeWarning, "AzureManagedControlPlane unavailable", "failed to patch resource: %s", err)
return reconcile.Result{}, err
}

if err := newAzureManagedControlPlaneReconciler(scope).Reconcile(ctx); err != nil {
// Handle transient and terminal errors
log := scope.WithValues("name", scope.ControlPlane.Name, "namespace", scope.ControlPlane.Namespace)
var reconcileError azure.ReconcileError
if errors.As(err, &reconcileError) {
if reconcileError.IsTerminal() {
log.Error(err, "failed to reconcile AzureManagedControlPlane")
return reconcile.Result{}, nil
}

if reconcileError.IsTransient() {
log.V(4).Info("requeuing due to transient transient failure", "error", err)
return reconcile.Result{RequeueAfter: reconcileError.RequeueAfter()}, nil
}

return reconcile.Result{}, errors.Wrap(err, "failed to reconcile AzureManagedControlPlane")
}

return reconcile.Result{}, errors.Wrapf(err, "error creating AzureManagedControlPlane %s/%s", scope.ControlPlane.Namespace, scope.ControlPlane.Name)
}

// No errors, so mark us ready so the Cluster API Cluster Controller can pull it
scope.ControlPlane.Status.Ready = true
scope.ControlPlane.Status.Initialized = true

amcpr.Recorder.Event(scope.ControlPlane, corev1.EventTypeNormal, "AzureManagedControlPlane available", "successfully reconciled")
return reconcile.Result{}, nil
}

Expand Down

0 comments on commit 0ac1bef

Please sign in to comment.