diff --git a/pkg/operator/internalimageregistry/cleanup_controller.go b/pkg/operator/internalimageregistry/cleanup_controller.go index 355e4c581..59e315e50 100644 --- a/pkg/operator/internalimageregistry/cleanup_controller.go +++ b/pkg/operator/internalimageregistry/cleanup_controller.go @@ -5,6 +5,7 @@ import ( errs "errors" "fmt" "net/http" + "slices" "strings" "time" @@ -64,9 +65,42 @@ func (c *imagePullSecretCleanupController) sync(ctx context.Context, controllerC if imageRegistryEnabled { return nil } + + // once the image registry is disabled we want to wait unil the controller manager + // operand is deployed and stable before clean up the image pull secrets. we don't + // want to compete with the operand that may be acting on the same resources we are. + if progressing, err := ControllerManagerIsProgressing(c.clusterOperatorLister); err != nil { + return err + } else if progressing { + return nil + } + return c.cleanup(ctx) } +// removeLegacyFinalizer removes the legacy finalizer from the image pull +// secret. With the registry removed the controller that was responsible for +// finalizing the secret isn't running anymore. It is safe thus to remove the +// finalizer if we also remove the secret pointed by its +// openshift.io/token-secret.name annotation. +func (c *imagePullSecretCleanupController) removeLegacyFinalizer(ctx context.Context, secret *corev1.Secret) error { + finalizers := slices.DeleteFunc(secret.Finalizers, func(finalizer string) bool { + return finalizer == "openshift.io/legacy-token" + }) + if len(finalizers) == len(secret.Finalizers) { + return nil + } + secret.Finalizers = finalizers + + // the image pull secret might of been deleted via cascade if its + // corresponding API token was deleted, so ignore NotFound error. + _, err := c.kubeClient.CoreV1().Secrets(secret.Namespace).Update(ctx, secret, metav1.UpdateOptions{}) + if err != nil && !errors.IsNotFound(err) { + return fmt.Errorf("unable to remove legacy token secret finalizers for %q (ns=%q): %w", secret.Name, secret.Namespace, err) + } + return nil +} + func (c *imagePullSecretCleanupController) cleanup(ctx context.Context) error { // cleanup service accounts serviceAccounts, err := c.serviceAccountLister.List(labels.Everything()) @@ -89,6 +123,15 @@ func (c *imagePullSecretCleanupController) cleanup(ctx context.Context) error { continue } + if imagePullSecret != nil { + // at this stage the operand responsible for removing the finalizer isn't running + // anymore. we need to remove the finalizer if it is present to avoid blocking + // the deletion we are about to do. + if err := c.removeLegacyFinalizer(ctx, imagePullSecret); err != nil { + return err + } + } + if imagePullSecret != nil && c.imagePullSecretInUse(imagePullSecret) { // managed image pull secret is referenced by a pod, skip for now continue diff --git a/pkg/operator/internalimageregistry/management_state.go b/pkg/operator/internalimageregistry/management_state.go index 811c43298..794e8b2db 100644 --- a/pkg/operator/internalimageregistry/management_state.go +++ b/pkg/operator/internalimageregistry/management_state.go @@ -7,6 +7,53 @@ import ( "k8s.io/klog/v2" ) +// ControllerManagerIsProgressing returns true if the operand for this operator +// is progressing. This function observes the "progressing", the "available", and +// the "degraded" conditions of the "openshift-controller-manager". +func ControllerManagerIsProgressing(clusterOperatorLister configlistersv1.ClusterOperatorLister) (bool, error) { + co, err := clusterOperatorLister.Get("openshift-controller-manager") + if err != nil { + return false, err + } + + if len(co.Status.Conditions) == 0 { + klog.V(4).InfoS("clusteroperators.config.openshift.io/openshift-controller-manager conditions do not yet exist") + return true, nil + } + + missingProgressing, missingAvailable, missingDegraded := true, true, true + for _, cond := range co.Status.Conditions { + if cond.Type == configv1.OperatorProgressing { + missingProgressing = false + if cond.Status != configv1.ConditionFalse { + klog.V(4).InfoS("clusteroperators.config.openshift.io/openshift-controller-manager is progressing") + return true, nil + } + } + if cond.Type == configv1.OperatorAvailable { + missingAvailable = false + if cond.Status != configv1.ConditionTrue { + klog.V(4).InfoS("clusteroperators.config.openshift-controller-manager is not available") + return true, nil + } + } + if cond.Type == configv1.OperatorDegraded { + missingDegraded = false + if cond.Status != configv1.ConditionFalse { + klog.V(4).InfoS("clusteroperators.config.openshift.io/openshift-controller-manager is degraded") + return true, nil + } + } + } + + if missingProgressing || missingAvailable || missingDegraded { + klog.V(4).InfoS("clusteroperators.config.openshift.io/openshift-controller-manager conditions available/progressing/degraded not found") + return true, nil + } + + return false, nil +} + // ImageRegistryIsEnabled returns true if the ImageRegistry capability // is enabled and the internal image registry has not been disabled. func ImageRegistryIsEnabled(clusterVersionLister configlistersv1.ClusterVersionLister, clusterOperatorLister configlistersv1.ClusterOperatorLister) (bool, error) {