Skip to content

Commit

Permalink
Fix finalizer processing
Browse files Browse the repository at this point in the history
There are two reconcilers for the CSV, one is controller-runtime based,
the other is based on queueinformer. A finalizer was added to the CSV
and it's handled by the controller-runtime reconciler.

However, the queueinformer-based reconciler may take a while to do its
processing such that the CSV may be deleted and the finalizer run via
the controller-runtime reconciler. (This still takes <1s.)

This causes problems when CSV being synced is deleted. The queueinformer
attempts to sync RBAC to the stale CSV. The proper (BIG/risky) fix is to
consolidate the two reconcilers. A less risky fix is to query the lister
cache to see if the CSV has been deleted (or has a DeletionTimestamp),
and not do the RBAC updates if that's the case.

Signed-off-by: Todd Short <todd.short@me.com>
  • Loading branch information
tmshort committed Feb 26, 2024
1 parent 9f42a6f commit 127d7ed
Show file tree
Hide file tree
Showing 3 changed files with 112 additions and 120 deletions.
112 changes: 111 additions & 1 deletion pkg/controller/operators/olm/operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ import (
apiregistrationv1 "k8s.io/kube-aggregator/pkg/apis/apiregistration/v1"
kagg "k8s.io/kube-aggregator/pkg/client/informers/externalversions"
utilclock "k8s.io/utils/clock"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"

operatorsv1 "github.com/operator-framework/api/pkg/operators/v1"
"github.com/operator-framework/api/pkg/operators/v1alpha1"
Expand Down Expand Up @@ -1093,6 +1095,9 @@ func (a *Operator) handleClusterServiceVersionDeletion(obj interface{}) {
"phase": clusterServiceVersion.Status.Phase,
})

logger.Debug("start deleting CSV")
defer logger.Debug("end deleting CSV")

metrics.DeleteCSVMetric(clusterServiceVersion)

if clusterServiceVersion.IsCopied() {
Expand Down Expand Up @@ -1277,6 +1282,96 @@ func (a *Operator) deleteChild(csv *metav1.PartialObjectMetadata, logger *logrus
return a.client.OperatorsV1alpha1().ClusterServiceVersions(csv.GetNamespace()).Delete(context.TODO(), csv.GetName(), metav1.DeleteOptions{})
}

// Return values, err, ok; ok == true: continue Reconcile, ok == false: exit Reconcile
func (a *Operator) processFinalizer(csv *v1alpha1.ClusterServiceVersion, log *logrus.Entry) (error, bool) {
myFinalizerName := "operators.coreos.com/csv-cleanup"

if csv.ObjectMeta.DeletionTimestamp.IsZero() {
// CSV is not being deleted, add finalizer if not present
if !controllerutil.ContainsFinalizer(csv, myFinalizerName) {
controllerutil.AddFinalizer(csv, myFinalizerName)
_, err := a.client.OperatorsV1alpha1().ClusterServiceVersions(csv.GetNamespace()).Update(a.ctx, csv, metav1.UpdateOptions{})
if err != nil {
log.WithError(err).Error("Adding finalizer")
return err, false
}
}
return nil, true
}

if !controllerutil.ContainsFinalizer(csv, myFinalizerName) {
// Finalizer has been removed; stop reconciliation as the CSV is being deleted
return nil, false
}

log.Info("started finalizer")
defer log.Info("completed finalizer")

// CSV is being deleted and the finalizer still present; do any clean up
ownerSelector := ownerutil.CSVOwnerSelector(csv)
listOptions := metav1.ListOptions{
LabelSelector: ownerSelector.String(),
}
deleteOptions := metav1.DeleteOptions{}
// Look for resources owned by this CSV, and delete them.
log.WithFields(logrus.Fields{"selector": ownerSelector}).Info("Cleaning up resources after CSV deletion")
var errs []error

err := a.opClient.KubernetesInterface().RbacV1().ClusterRoleBindings().DeleteCollection(a.ctx, deleteOptions, listOptions)
if client.IgnoreNotFound(err) != nil {
log.WithError(err).Error("Deleting ClusterRoleBindings on CSV delete")
errs = append(errs, err)
}

err = a.opClient.KubernetesInterface().RbacV1().ClusterRoles().DeleteCollection(a.ctx, deleteOptions, listOptions)
if client.IgnoreNotFound(err) != nil {
log.WithError(err).Error("Deleting ClusterRoles on CSV delete")
errs = append(errs, err)
}
err = a.opClient.KubernetesInterface().AdmissionregistrationV1().MutatingWebhookConfigurations().DeleteCollection(a.ctx, deleteOptions, listOptions)
if client.IgnoreNotFound(err) != nil {
log.WithError(err).Error("Deleting MutatingWebhookConfigurations on CSV delete")
errs = append(errs, err)
}

err = a.opClient.KubernetesInterface().AdmissionregistrationV1().ValidatingWebhookConfigurations().DeleteCollection(a.ctx, deleteOptions, listOptions)
if client.IgnoreNotFound(err) != nil {
log.WithError(err).Error("Deleting ValidatingWebhookConfigurations on CSV delete")
errs = append(errs, err)
}

// Make sure things are deleted
crbList, err := a.lister.RbacV1().ClusterRoleBindingLister().List(ownerSelector)
if err != nil {
errs = append(errs, err)
} else if len(crbList) != 0 {
errs = append(errs, fmt.Errorf("waiting for ClusterRoleBindings to delete"))
}

crList, err := a.lister.RbacV1().ClusterRoleLister().List(ownerSelector)
if err != nil {
errs = append(errs, err)
} else if len(crList) != 0 {
errs = append(errs, fmt.Errorf("waiting for ClusterRoles to delete"))
}

// Return any errors
if err := utilerrors.NewAggregate(errs); err != nil {
return err, false
}

// If no errors, remove our finalizer from the CSV and update
controllerutil.RemoveFinalizer(csv, myFinalizerName)
_, err = a.client.OperatorsV1alpha1().ClusterServiceVersions(csv.GetNamespace()).Update(a.ctx, csv, metav1.UpdateOptions{})
if err != nil {
log.WithError(err).Error("Removing finalizer")
return err, false
}

// Stop reconciliation as the csv is being deleted
return nil, false
}

// syncClusterServiceVersion is the method that gets called when we see a CSV event in the cluster
func (a *Operator) syncClusterServiceVersion(obj interface{}) (syncError error) {
clusterServiceVersion, ok := obj.(*v1alpha1.ClusterServiceVersion)
Expand All @@ -1291,7 +1386,22 @@ func (a *Operator) syncClusterServiceVersion(obj interface{}) (syncError error)
"namespace": clusterServiceVersion.GetNamespace(),
"phase": clusterServiceVersion.Status.Phase,
})
logger.Debug("syncing CSV")
logger.Debug("start syncing CSV")
defer logger.Debug("end syncing CSV")

// get an up-to-date clusterServiceVersion from the cache
clusterServiceVersion, err := a.lister.OperatorsV1alpha1().ClusterServiceVersionLister().ClusterServiceVersions(clusterServiceVersion.GetNamespace()).Get(clusterServiceVersion.GetName())
if apierrors.IsNotFound(err) {
logger.Info("CSV has beeen deleted")
return nil
} else if err != nil {
logger.Info("Error getting latest version of CSV")
return err
}

if err, ok := a.processFinalizer(clusterServiceVersion, logger); !ok {
return err
}

if a.csvNotification != nil {
a.csvNotification.OnAddOrUpdate(clusterServiceVersion)
Expand Down
118 changes: 0 additions & 118 deletions pkg/controller/operators/operatorconditiongenerator_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,15 @@ package operators

import (
"context"
"fmt"
"reflect"

"github.com/go-logr/logr"
admissionregistrationv1 "k8s.io/api/admissionregistration/v1"
rbacv1 "k8s.io/api/rbac/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
utilerrors "k8s.io/apimachinery/pkg/util/errors"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
"sigs.k8s.io/controller-runtime/pkg/event"
"sigs.k8s.io/controller-runtime/pkg/handler"
"sigs.k8s.io/controller-runtime/pkg/predicate"
Expand Down Expand Up @@ -100,10 +95,6 @@ func (r *OperatorConditionGeneratorReconciler) Reconcile(ctx context.Context, re
return ctrl.Result{}, client.IgnoreNotFound(err)
}

if err, ok := r.processFinalizer(ctx, in); !ok {
return ctrl.Result{}, err
}

operatorCondition := &operatorsv2.OperatorCondition{
ObjectMeta: metav1.ObjectMeta{
// For now, only generate an OperatorCondition with the same name as the csv.
Expand Down Expand Up @@ -179,112 +170,3 @@ func (r *OperatorConditionGeneratorReconciler) ensureOperatorCondition(operatorC
existingOperatorCondition.Spec.ServiceAccounts = operatorCondition.Spec.ServiceAccounts
return r.Client.Update(context.TODO(), existingOperatorCondition)
}

// Return values, err, ok; ok == true: continue Reconcile, ok == false: exit Reconcile
func (r *OperatorConditionGeneratorReconciler) processFinalizer(ctx context.Context, csv *operatorsv1alpha1.ClusterServiceVersion) (error, bool) {
myFinalizerName := "operators.coreos.com/csv-cleanup"
log := r.log.WithValues("name", csv.GetName()).WithValues("namespace", csv.GetNamespace())

if csv.ObjectMeta.DeletionTimestamp.IsZero() {
// CSV is not being deleted, add finalizer if not present
if !controllerutil.ContainsFinalizer(csv, myFinalizerName) {
patch := csv.DeepCopy()
controllerutil.AddFinalizer(patch, myFinalizerName)
if err := r.Client.Patch(ctx, patch, client.MergeFrom(csv)); err != nil {
log.Error(err, "Adding finalizer")
return err, false
}
}
return nil, true
}

if !controllerutil.ContainsFinalizer(csv, myFinalizerName) {
// Finalizer has been removed; stop reconciliation as the CSV is being deleted
return nil, false
}

// CSV is being deleted and the finalizer still present; do any clean up
ownerSelector := ownerutil.CSVOwnerSelector(csv)
listOptions := client.ListOptions{
LabelSelector: ownerSelector,
}
deleteOptions := client.DeleteAllOfOptions{
ListOptions: listOptions,
}
// Look for resources owned by this CSV, and delete them.
log.WithValues("selector", ownerSelector).Info("Cleaning up resources after CSV deletion")
var errs []error

err := r.Client.DeleteAllOf(ctx, &rbacv1.ClusterRoleBinding{}, &deleteOptions)
if client.IgnoreNotFound(err) != nil {
log.Error(err, "Deleting ClusterRoleBindings on CSV delete")
errs = append(errs, err)
}

err = r.Client.DeleteAllOf(ctx, &rbacv1.ClusterRole{}, &deleteOptions)
if client.IgnoreNotFound(err) != nil {
log.Error(err, "Deleting ClusterRoles on CSV delete")
errs = append(errs, err)
}

err = r.Client.DeleteAllOf(ctx, &admissionregistrationv1.MutatingWebhookConfiguration{}, &deleteOptions)
if client.IgnoreNotFound(err) != nil {
log.Error(err, "Deleting MutatingWebhookConfigurations on CSV delete")
errs = append(errs, err)
}

err = r.Client.DeleteAllOf(ctx, &admissionregistrationv1.ValidatingWebhookConfiguration{}, &deleteOptions)
if client.IgnoreNotFound(err) != nil {
log.Error(err, "Deleting ValidatingWebhookConfigurations on CSV delete")
errs = append(errs, err)
}

// Make sure things are deleted
crbList := &rbacv1.ClusterRoleBindingList{}
err = r.Client.List(ctx, crbList, &listOptions)
if err != nil {
errs = append(errs, err)
} else if len(crbList.Items) != 0 {
errs = append(errs, fmt.Errorf("waiting for ClusterRoleBindings to delete"))
}

crList := &rbacv1.ClusterRoleList{}
err = r.Client.List(ctx, crList, &listOptions)
if err != nil {
errs = append(errs, err)
} else if len(crList.Items) != 0 {
errs = append(errs, fmt.Errorf("waiting for ClusterRoles to delete"))
}

mwcList := &admissionregistrationv1.MutatingWebhookConfigurationList{}
err = r.Client.List(ctx, mwcList, &listOptions)
if err != nil {
errs = append(errs, err)
} else if len(mwcList.Items) != 0 {
errs = append(errs, fmt.Errorf("waiting for MutatingWebhookConfigurations to delete"))
}

vwcList := &admissionregistrationv1.ValidatingWebhookConfigurationList{}
err = r.Client.List(ctx, vwcList, &listOptions)
if err != nil {
errs = append(errs, err)
} else if len(vwcList.Items) != 0 {
errs = append(errs, fmt.Errorf("waiting for ValidatingWebhookConfigurations to delete"))
}

// Return any errors
if err := utilerrors.NewAggregate(errs); err != nil {
return err, false
}

// If no errors, remove our finalizer from the CSV and update
patch := csv.DeepCopy()
controllerutil.RemoveFinalizer(patch, myFinalizerName)
if err := r.Client.Patch(ctx, patch, client.MergeFrom(csv)); err != nil {
log.Error(err, "Removing finalizer")
return err, false
}

// Stop reconciliation as the csv is being deleted
return nil, false
}
2 changes: 1 addition & 1 deletion test/e2e/installplan_e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2925,7 +2925,7 @@ var _ = Describe("Install Plan", func() {
}

return true
}, pollDuration*2, pollInterval).Should(BeTrue())
}, pollDuration, pollInterval).Should(BeTrue())
By("Cleaning up the test")
})

Expand Down

0 comments on commit 127d7ed

Please sign in to comment.