Skip to content

Commit

Permalink
Delete the lock created by CSO 4.6.
Browse files Browse the repository at this point in the history
On a downgrade from 4.6 to 4.5 scenario, the leader
election ConfigMap is created by CSO 4.6 (library-go)
and it doesn't have any `ownerReference` set.

However, CSO 4.5 uses `operator-sdk` with
leader-for-life election approach, and it expects
that the ConfigMap either doesn't exist or
exists but has an `ownerReference` set (in order
to know whether the lock belongs to it or not).

Since it doesn't find any, it tries to create one,
but it gets back an error stating the the object
already exists. This goes on forever and CSO 4.5
never become the leader.

This patch identifies if the lock was created
by CSO 4.6 and, if so, deletes it so that CSO 4.5
can become the leader using the leader-for-life model
  • Loading branch information
bertinatto committed Sep 24, 2020
1 parent 52cc7bf commit 2d1e41a
Showing 1 changed file with 63 additions and 4 deletions.
67 changes: 63 additions & 4 deletions cmd/manager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,14 @@ import (
configv1 "github.com/openshift/api/config/v1"
"github.com/openshift/cluster-storage-operator/pkg/apis"
"github.com/openshift/cluster-storage-operator/pkg/controller"
"github.com/operator-framework/operator-sdk/pkg/k8sutil"
"github.com/operator-framework/operator-sdk/pkg/leader"
sdkVersion "github.com/operator-framework/operator-sdk/version"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
_ "k8s.io/client-go/plugin/pkg/client/auth/gcp"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/config"
"sigs.k8s.io/controller-runtime/pkg/manager"
logf "sigs.k8s.io/controller-runtime/pkg/runtime/log"
Expand All @@ -27,6 +32,51 @@ func printVersion() {
log.Info(fmt.Sprintf("Version of operator-sdk: %v", sdkVersion.Version))
}

// tryDeleteIncompatibleLock tries to identify a ConfigMap created by CSO 4.6 and delete it.
// On a downgrade from 4.6 to 4.5 scenario, the leader election ConfigMap is created by CSO 4.6
// (library-go) and it doesn't have any ownerReference set. However, CSO 4.5 uses the leader-for-life
// election model, and it expects that the ConfigMap either doesn't exist or exists but has an
// ownerReference set (in order to know whether the lock belongs to it or not). Without this code,
// CSO 4.5 will perpetually fail to acquire the lock.
// More information at https://bugzilla.redhat.com/show_bug.cgi?id=1877316
func tryDeleteIncompatibleLock(cl client.Client, lockName string) error {
namespace, err := k8sutil.GetOperatorNamespace()
if err != nil {
return err
}

key := client.ObjectKey{Namespace: namespace, Name: lockName}
cm := &corev1.ConfigMap{
TypeMeta: v1.TypeMeta{
APIVersion: "v1",
Kind: "ConfigMap",
},
}

err = cl.Get(context.TODO(), key, cm)
if apierrors.IsNotFound(err) {
return nil
}
if err != nil {
return err
}

// If the ConfigMap has metadata.ownerReferences, then it was
// likely created by CSO 4.5. In this case, we don't want to
// delete the lock, otherwise we could end up with multiple
// operators running at the same time.
if len(cm.GetOwnerReferences()) > 1 {
return nil
}

log.Info("Found ConfigMap lock without metadata.ownerReferences, deleting")
err = cl.Delete(context.TODO(), cm)
if apierrors.IsNotFound(err) {
return nil
}
return err
}

func main() {
flag.Parse()

Expand All @@ -47,15 +97,24 @@ func main() {
os.Exit(1)
}

// Become the leader before proceeding
err = leader.Become(context.TODO(), "cluster-storage-operator-lock")
// Create a new Cmd to provide shared dependencies and start components
mgr, err := manager.New(cfg, manager.Options{Namespace: namespace})
if err != nil {
log.Error(err, "")
os.Exit(1)
}

// Create a new Cmd to provide shared dependencies and start components
mgr, err := manager.New(cfg, manager.Options{Namespace: namespace})
// Delete incompatible ConfigMap lock if it exists
lockName := "cluster-storage-operator-lock"
err = tryDeleteIncompatibleLock(mgr.GetClient(), lockName)
if err != nil {
// We don't exit if an error happens, but we let the operator
// try to become the leader below. If it doesn't succeed, then we exit
log.Error(err, "error trying to delete incompatible ConfigMap lock")
}

// Become the leader before proceeding
err = leader.Become(context.TODO(), lockName)
if err != nil {
log.Error(err, "")
os.Exit(1)
Expand Down

0 comments on commit 2d1e41a

Please sign in to comment.