Skip to content

Commit

Permalink
Perform reads and modifications to Shoot.Info in a concurrency safe w…
Browse files Browse the repository at this point in the history
…ay (gardener#4459)

* Ensure patch operations on b.Shoot.Info are performed on a copy

* Introduce GetInfo and UpdateInfo methods and replace naked usages of Info in production code

* Introduce SetInfo method and replace naked usages of Info in test code

* Reintroduce strategic merge where it was used previously

* Minor comment updates

* Use atomic.Value for info and remove read locking

* Address code review comments

* Avoid multiple GetInfo calls in helper methods.
  • Loading branch information
stoyanr authored and Kristiyan Gostev committed Apr 21, 2022
1 parent 9e2c361 commit 36fc410
Show file tree
Hide file tree
Showing 64 changed files with 538 additions and 477 deletions.
2 changes: 1 addition & 1 deletion pkg/gardenlet/controller/shoot/shoot_care_control.go
Original file line number Diff line number Diff line change
Expand Up @@ -392,7 +392,7 @@ func retrieveSeedConditions(ctx context.Context, operation *operation.Operation)
}

seed := &gardencorev1beta1.Seed{}
if err := operation.K8sGardenClient.Client().Get(ctx, kutil.Key(operation.Shoot.Info.Name), seed); client.IgnoreNotFound(err) != nil {
if err := operation.K8sGardenClient.Client().Get(ctx, kutil.Key(operation.Shoot.GetInfo().Name), seed); client.IgnoreNotFound(err) != nil {
return nil, err
}
return seed.Status.Conditions, nil
Expand Down
12 changes: 6 additions & 6 deletions pkg/gardenlet/controller/shoot/shoot_care_control_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -247,15 +247,15 @@ var _ = Describe("Shoot Care Control", func() {
WithClientSetForKey(keys.ForSeedWithName(seedName), seedClientSet).
Build()

operationFunc = opFunc(&operation.Operation{
op := &operation.Operation{
K8sGardenClient: gardenClientSet,
K8sSeedClient: seedClientSet,
ManagedSeed: managedSeed,
Shoot: &operationshoot.Shoot{
Info: shoot,
},
Logger: logger.NewNopLogger().WithContext(context.Background()),
}, nil)
Shoot: &operationshoot.Shoot{},
Logger: logger.NewNopLogger().WithContext(context.Background()),
}
op.Shoot.SetInfo(shoot)
operationFunc = opFunc(op, nil)

revertFns = append(revertFns,
test.WithVar(&NewOperation, operationFunc),
Expand Down
36 changes: 18 additions & 18 deletions pkg/gardenlet/controller/shoot/shoot_control.go
Original file line number Diff line number Diff line change
Expand Up @@ -368,18 +368,18 @@ func (c *Controller) deleteShoot(ctx context.Context, logger *logrus.Entry, gard

// At this point the deletion is allowed, hence, check if the seed is up-to-date, then sync the Cluster resource
// initialize a new operation and, eventually, start the deletion flow.
if err := c.checkSeedAndSyncClusterResource(ctx, o.Shoot.Info, project, cloudProfile, seed); err != nil {
return patchShootStatusAndRequeueOnSyncError(ctx, gardenClient.Client(), o.Shoot.Info, logger, err)
if err := c.checkSeedAndSyncClusterResource(ctx, shoot, project, cloudProfile, seed); err != nil {
return patchShootStatusAndRequeueOnSyncError(ctx, gardenClient.Client(), shoot, logger, err)
}

if flowErr := c.runDeleteShootFlow(ctx, o); flowErr != nil {
c.recorder.Event(o.Shoot.Info, corev1.EventTypeWarning, gardencorev1beta1.EventDeleteError, flowErr.Description)
updateErr := c.patchShootStatusOperationError(ctx, gardenClient.Client(), o, o.Shoot.Info, flowErr.Description, operationType, flowErr.LastErrors...)
c.recorder.Event(shoot, corev1.EventTypeWarning, gardencorev1beta1.EventDeleteError, flowErr.Description)
updateErr := c.patchShootStatusOperationError(ctx, gardenClient.Client(), o, shoot, flowErr.Description, operationType, flowErr.LastErrors...)
return reconcile.Result{}, utilerrors.WithSuppressed(errors.New(flowErr.Description), updateErr)
}

c.recorder.Event(o.Shoot.Info, corev1.EventTypeNormal, gardencorev1beta1.EventDeleted, "Deleted Shoot cluster")
return c.finalizeShootDeletion(ctx, gardenClient, o.Shoot.Info, project.Name)
c.recorder.Event(shoot, corev1.EventTypeNormal, gardencorev1beta1.EventDeleted, "Deleted Shoot cluster")
return c.finalizeShootDeletion(ctx, gardenClient, shoot, project.Name)
}

func (c *Controller) finalizeShootDeletion(ctx context.Context, gardenClient kubernetes.Interface, shoot *gardencorev1beta1.Shoot, projectName string) (reconcile.Result, error) {
Expand Down Expand Up @@ -486,42 +486,42 @@ func (c *Controller) reconcileShoot(ctx context.Context, logger *logrus.Entry, g
}

// write UID to status when operation was created successfully once
if len(o.Shoot.Info.Status.UID) == 0 {
patch := client.MergeFrom(o.Shoot.Info.DeepCopy())
if len(shoot.Status.UID) == 0 {
patch := client.MergeFrom(shoot.DeepCopy())
shoot.Status.UID = shoot.UID
err := gardenClient.Client().Status().Patch(ctx, o.Shoot.Info, patch)
err := gardenClient.Client().Status().Patch(ctx, shoot, patch)
if err != nil {
return reconcile.Result{}, err
}
}

// At this point the reconciliation is allowed, hence, check if the seed is up-to-date, then sync the Cluster resource
// initialize a new operation and, eventually, start the reconciliation flow.
if err := c.checkSeedAndSyncClusterResource(ctx, o.Shoot.Info, project, cloudProfile, seed); err != nil {
return patchShootStatusAndRequeueOnSyncError(ctx, gardenClient.Client(), o.Shoot.Info, logger, err)
if err := c.checkSeedAndSyncClusterResource(ctx, shoot, project, cloudProfile, seed); err != nil {
return patchShootStatusAndRequeueOnSyncError(ctx, gardenClient.Client(), shoot, logger, err)
}

c.shootReconciliationDueTracker.off(key)

if flowErr := c.runReconcileShootFlow(ctx, o); flowErr != nil {
c.recorder.Event(o.Shoot.Info, corev1.EventTypeWarning, gardencorev1beta1.EventReconcileError, flowErr.Description)
updateErr := c.patchShootStatusOperationError(ctx, gardenClient.Client(), o, o.Shoot.Info, flowErr.Description, operationType, flowErr.LastErrors...)
c.recorder.Event(shoot, corev1.EventTypeWarning, gardencorev1beta1.EventReconcileError, flowErr.Description)
updateErr := c.patchShootStatusOperationError(ctx, gardenClient.Client(), o, shoot, flowErr.Description, operationType, flowErr.LastErrors...)
return reconcile.Result{}, utilerrors.WithSuppressed(errors.New(flowErr.Description), updateErr)
}

c.recorder.Event(o.Shoot.Info, corev1.EventTypeNormal, gardencorev1beta1.EventReconciled, "Reconciled Shoot cluster state")
if err := c.patchShootStatusOperationSuccess(ctx, gardenClient.Client(), o, o.Shoot.Info, o.Shoot.SeedNamespace, &o.Seed.Info.Name, operationType); err != nil {
c.recorder.Event(shoot, corev1.EventTypeNormal, gardencorev1beta1.EventReconciled, "Reconciled Shoot cluster state")
if err := c.patchShootStatusOperationSuccess(ctx, gardenClient.Client(), o, shoot, o.Shoot.SeedNamespace, &o.Seed.Info.Name, operationType); err != nil {
return reconcile.Result{}, err
}

if syncErr := c.syncClusterResourceToSeed(ctx, o.Shoot.Info, project, cloudProfile, seed); syncErr != nil {
if syncErr := c.syncClusterResourceToSeed(ctx, shoot, project, cloudProfile, seed); syncErr != nil {
logger.WithError(syncErr).Infof("Cluster resource sync to seed failed")
updateErr := c.patchShootStatusOperationError(ctx, gardenClient.Client(), o, o.Shoot.Info, syncErr.Error(), operationType, o.Shoot.Info.Status.LastErrors...)
updateErr := c.patchShootStatusOperationError(ctx, gardenClient.Client(), o, shoot, syncErr.Error(), operationType, shoot.Status.LastErrors...)
return reconcile.Result{}, utilerrors.WithSuppressed(syncErr, updateErr)
}

c.shootReconciliationDueTracker.on(key)
return c.scheduleNextSync(logger, o.Shoot.Info, false, "Reconciliation finished successfully"), nil
return c.scheduleNextSync(logger, shoot, false, "Reconciliation finished successfully"), nil
}

func (c *Controller) scheduleNextSync(logger logrus.FieldLogger, shoot *gardencorev1beta1.Shoot, regularReconciliationIsDue bool, reason string) reconcile.Result {
Expand Down
14 changes: 7 additions & 7 deletions pkg/gardenlet/controller/shoot/shoot_control_delete.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ func (c *Controller) runDeleteShootFlow(ctx context.Context, o *operation.Operat
err error
)

for _, lastError := range o.Shoot.Info.Status.LastErrors {
for _, lastError := range o.Shoot.GetInfo().Status.LastErrors {
if lastError.TaskID != nil {
tasksWithErrors = append(tasksWithErrors, *lastError.TaskID)
}
Expand Down Expand Up @@ -158,7 +158,7 @@ func (c *Controller) runDeleteShootFlow(ctx context.Context, o *operation.Operat
cleanupShootResources = nonTerminatingNamespace && kubeAPIServerDeploymentFound && infrastructure != nil
defaultInterval = 5 * time.Second
defaultTimeout = 30 * time.Second
staticNodesCIDR = o.Shoot.Info.Spec.Networking.Nodes != nil
staticNodesCIDR = o.Shoot.GetInfo().Spec.Networking.Nodes != nil
useSNI = botanist.APIServerSNIEnabled()
sniPhase = botanist.Shoot.Components.ControlPlane.KubeAPIServerSNIPhase

Expand Down Expand Up @@ -351,7 +351,7 @@ func (c *Controller) runDeleteShootFlow(ctx context.Context, o *operation.Operat
})
cleanExtendedAPIs = g.Add(flow.Task{
Name: "Cleaning extended API groups",
Fn: flow.TaskFn(botanist.CleanExtendedAPIs).Timeout(10 * time.Minute).DoIf(cleanupShootResources && !metav1.HasAnnotation(o.Shoot.Info.ObjectMeta, v1beta1constants.AnnotationShootSkipCleanup)),
Fn: flow.TaskFn(botanist.CleanExtendedAPIs).Timeout(10 * time.Minute).DoIf(cleanupShootResources && !metav1.HasAnnotation(o.Shoot.GetInfo().ObjectMeta, v1beta1constants.AnnotationShootSkipCleanup)),
Dependencies: flow.NewTaskIDs(initializeShootClients, deleteClusterAutoscaler, waitForControllersToBeActive),
})

Expand Down Expand Up @@ -581,17 +581,17 @@ func (c *Controller) runDeleteShootFlow(ctx context.Context, o *operation.Operat
ErrorCleaner: o.CleanShootTaskErrorAndUpdateStatusLabel,
ErrorContext: errorContext,
}); err != nil {
o.Logger.Errorf("Error deleting Shoot %q: %+v", o.Shoot.Info.Name, err)
o.Logger.Errorf("Error deleting Shoot %q: %+v", o.Shoot.GetInfo().Name, err)
return gardencorev1beta1helper.NewWrappedLastErrors(gardencorev1beta1helper.FormatLastErrDescription(err), flow.Errors(err))
}

// ensure that shoot client is invalidated after it has been deleted
if err := o.ClientMap.InvalidateClient(keys.ForShoot(o.Shoot.Info)); err != nil {
if err := o.ClientMap.InvalidateClient(keys.ForShoot(o.Shoot.GetInfo())); err != nil {
err = fmt.Errorf("failed to invalidate shoot client: %w", err)
return gardencorev1beta1helper.NewWrappedLastErrors(gardencorev1beta1helper.FormatLastErrDescription(err), err)
}

o.Logger.Infof("Successfully deleted Shoot %q", o.Shoot.Info.Name)
o.Logger.Infof("Successfully deleted Shoot %q", o.Shoot.GetInfo().Name)
return nil
}

Expand Down Expand Up @@ -626,7 +626,7 @@ func (c *Controller) removeFinalizerFrom(ctx context.Context, gardenClient kuber
func needsControlPlaneDeployment(ctx context.Context, o *operation.Operation, kubeAPIServerDeploymentFound bool, infrastructure *extensionsv1alpha1.Infrastructure) (bool, error) {
var (
namespace = o.Shoot.SeedNamespace
name = o.Shoot.Info.Name
name = o.Shoot.GetInfo().Name
)

// If the `ControlPlane` resource and the kube-apiserver deployment do no longer exist then we don't want to re-deploy it.
Expand Down
10 changes: 5 additions & 5 deletions pkg/gardenlet/controller/shoot/shoot_control_migrate.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ func (c *Controller) prepareShootForMigration(ctx context.Context, logger *logru

if flowErr := c.runPrepareShootControlPlaneMigration(o); flowErr != nil {
c.recorder.Event(shoot, corev1.EventTypeWarning, gardencorev1beta1.EventMigrationPreparationFailed, flowErr.Description)
updateErr := c.patchShootStatusOperationError(ctx, gardenClient.Client(), o, o.Shoot.Info, flowErr.Description, gardencorev1beta1.LastOperationTypeMigrate, flowErr.LastErrors...)
updateErr := c.patchShootStatusOperationError(ctx, gardenClient.Client(), o, shoot, flowErr.Description, gardencorev1beta1.LastOperationTypeMigrate, flowErr.LastErrors...)
return reconcile.Result{}, utilerrors.WithSuppressed(errors.New(flowErr.Description), updateErr)
}

Expand All @@ -86,7 +86,7 @@ func (c *Controller) runPrepareShootControlPlaneMigration(o *operation.Operation
kubeAPIServerDeploymentFound = true
)

for _, lastError := range o.Shoot.Info.Status.LastErrors {
for _, lastError := range o.Shoot.GetInfo().Status.LastErrors {
if lastError.TaskID != nil {
tasksWithErrors = append(tasksWithErrors, *lastError.TaskID)
}
Expand Down Expand Up @@ -145,7 +145,7 @@ func (c *Controller) runPrepareShootControlPlaneMigration(o *operation.Operation
var (
nonTerminatingNamespace = botanist.SeedNamespaceObject.Status.Phase != corev1.NamespaceTerminating
cleanupShootResources = nonTerminatingNamespace && kubeAPIServerDeploymentFound
wakeupRequired = (o.Shoot.Info.Status.IsHibernated || (!o.Shoot.Info.Status.IsHibernated && o.Shoot.HibernationEnabled)) && cleanupShootResources
wakeupRequired = (o.Shoot.GetInfo().Status.IsHibernated || o.Shoot.HibernationEnabled) && cleanupShootResources
defaultTimeout = 10 * time.Minute
defaultInterval = 5 * time.Second

Expand Down Expand Up @@ -295,11 +295,11 @@ func (c *Controller) runPrepareShootControlPlaneMigration(o *operation.Operation
ErrorContext: errorContext,
ErrorCleaner: o.CleanShootTaskErrorAndUpdateStatusLabel,
}); err != nil {
o.Logger.Errorf("Failed to prepare Shoot %q for migration: %+v", o.Shoot.Info.Name, err)
o.Logger.Errorf("Failed to prepare Shoot %q for migration: %+v", o.Shoot.GetInfo().Name, err)
return gardencorev1beta1helper.NewWrappedLastErrors(gardencorev1beta1helper.FormatLastErrDescription(err), flow.Errors(err))
}

o.Logger.Infof("Successfully prepared Shoot's control plane for migration %q", o.Shoot.Info.Name)
o.Logger.Infof("Successfully prepared Shoot's control plane for migration %q", o.Shoot.GetInfo().Name)
return nil
}

Expand Down
29 changes: 14 additions & 15 deletions pkg/gardenlet/controller/shoot/shoot_control_reconcile.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,6 @@ import (
"github.com/gardener/gardener/pkg/utils/flow"
kutil "github.com/gardener/gardener/pkg/utils/kubernetes"
retryutils "github.com/gardener/gardener/pkg/utils/retry"

"sigs.k8s.io/controller-runtime/pkg/client"
)

// runReconcileShootFlow reconciles the Shoot cluster's state.
Expand All @@ -45,7 +43,7 @@ func (c *Controller) runReconcileShootFlow(ctx context.Context, o *operation.Ope
err error
)

for _, lastError := range o.Shoot.Info.Status.LastErrors {
for _, lastError := range o.Shoot.GetInfo().Status.LastErrors {
if lastError.TaskID != nil {
tasksWithErrors = append(tasksWithErrors, *lastError.TaskID)
}
Expand Down Expand Up @@ -82,11 +80,11 @@ func (c *Controller) runReconcileShootFlow(ctx context.Context, o *operation.Ope
defaultInterval = 5 * time.Second
dnsEnabled = !o.Shoot.DisableDNS
allowBackup = o.Seed.Info.Spec.Backup != nil
staticNodesCIDR = o.Shoot.Info.Spec.Networking.Nodes != nil
staticNodesCIDR = o.Shoot.GetInfo().Spec.Networking.Nodes != nil
useSNI = botanist.APIServerSNIEnabled()
generation = o.Shoot.Info.Generation
generation = o.Shoot.GetInfo().Generation
sniPhase = botanist.Shoot.Components.ControlPlane.KubeAPIServerSNIPhase
requestControlPlanePodsRestart = controllerutils.HasTask(o.Shoot.Info.Annotations, v1beta1constants.ShootTaskRestartControlPlanePods)
requestControlPlanePodsRestart = controllerutils.HasTask(o.Shoot.GetInfo().Annotations, v1beta1constants.ShootTaskRestartControlPlanePods)

g = flow.NewGraph("Shoot cluster reconciliation")
ensureShootStateExists = g.Add(flow.Task{
Expand Down Expand Up @@ -354,7 +352,7 @@ func (c *Controller) runReconcileShootFlow(ctx context.Context, o *operation.Ope
if err := botanist.DeployManagedResourceForAddons(ctx); err != nil {
return err
}
if controllerutils.HasTask(o.Shoot.Info.Annotations, v1beta1constants.ShootTaskRestartCoreAddons) {
if controllerutils.HasTask(o.Shoot.GetInfo().Annotations, v1beta1constants.ShootTaskRestartCoreAddons) {
return removeTaskAnnotation(ctx, o, generation, v1beta1constants.ShootTaskRestartCoreAddons)
}
return nil
Expand All @@ -378,7 +376,7 @@ func (c *Controller) runReconcileShootFlow(ctx context.Context, o *operation.Ope
})
nginxLBReady = g.Add(flow.Task{
Name: "Waiting until nginx ingress LoadBalancer is ready",
Fn: flow.TaskFn(botanist.WaitUntilNginxIngressServiceIsReady).DoIf(gardencorev1beta1helper.NginxIngressEnabled(botanist.Shoot.Info.Spec.Addons)).SkipIf(o.Shoot.HibernationEnabled),
Fn: flow.TaskFn(botanist.WaitUntilNginxIngressServiceIsReady).DoIf(gardencorev1beta1helper.NginxIngressEnabled(botanist.Shoot.GetInfo().Spec.Addons)).SkipIf(o.Shoot.HibernationEnabled),
Dependencies: flow.NewTaskIDs(deployManagedResourcesForAddons, initializeShootClients, waitUntilWorkerReady, ensureShootClusterIdentity),
})
deployIngressDomainDNSRecord = g.Add(flow.Task{
Expand Down Expand Up @@ -519,35 +517,36 @@ func (c *Controller) runReconcileShootFlow(ctx context.Context, o *operation.Ope
ErrorContext: errorContext,
ErrorCleaner: o.CleanShootTaskErrorAndUpdateStatusLabel,
}); err != nil {
o.Logger.Errorf("Failed to reconcile Shoot %q: %+v", o.Shoot.Info.Name, err)
o.Logger.Errorf("Failed to reconcile Shoot %q: %+v", o.Shoot.GetInfo().Name, err)
return gardencorev1beta1helper.NewWrappedLastErrors(gardencorev1beta1helper.FormatLastErrDescription(err), flow.Errors(err))
}

// ensure that shoot client is invalidated after it has been hibernated
if o.Shoot.HibernationEnabled {
if err := o.ClientMap.InvalidateClient(keys.ForShoot(o.Shoot.Info)); err != nil {
if err := o.ClientMap.InvalidateClient(keys.ForShoot(o.Shoot.GetInfo())); err != nil {
err = fmt.Errorf("failed to invalidate shoot client: %w", err)
return gardencorev1beta1helper.NewWrappedLastErrors(gardencorev1beta1helper.FormatLastErrDescription(err), err)
}
}

o.Logger.Infof("Successfully reconciled Shoot %q", o.Shoot.Info.Name)
o.Logger.Infof("Successfully reconciled Shoot %q", o.Shoot.GetInfo().Name)
return nil
}

func removeTaskAnnotation(ctx context.Context, o *operation.Operation, generation int64, tasksToRemove ...string) error {
// Check if shoot generation was changed mid-air, i.e., whether we need to wait for the next reconciliation until we
// can safely remove the task annotations to ensure all required tasks are executed.
shoot := &gardencorev1beta1.Shoot{}
if err := o.K8sGardenClient.APIReader().Get(ctx, kutil.Key(o.Shoot.Info.Namespace, o.Shoot.Info.Name), shoot); err != nil {
if err := o.K8sGardenClient.APIReader().Get(ctx, kutil.Key(o.Shoot.GetInfo().Namespace, o.Shoot.GetInfo().Name), shoot); err != nil {
return err
}

if shoot.Generation != generation {
return nil
}

oldObj := o.Shoot.Info.DeepCopy()
controllerutils.RemoveTasks(o.Shoot.Info.Annotations, tasksToRemove...)
return o.K8sGardenClient.Client().Patch(ctx, o.Shoot.Info, client.MergeFrom(oldObj))
return o.Shoot.UpdateInfo(ctx, o.K8sGardenClient.Client(), false, func(shoot *gardencorev1beta1.Shoot) error {
controllerutils.RemoveTasks(shoot.Annotations, tasksToRemove...)
return nil
})
}
Loading

0 comments on commit 36fc410

Please sign in to comment.