diff --git a/pkg/controller/clusterdeployment/clusterdeployment_controller.go b/pkg/controller/clusterdeployment/clusterdeployment_controller.go index 3e39b14c1b7..b839fe1f263 100644 --- a/pkg/controller/clusterdeployment/clusterdeployment_controller.go +++ b/pkg/controller/clusterdeployment/clusterdeployment_controller.go @@ -45,6 +45,7 @@ import ( hiveintv1alpha1 "github.com/openshift/hive/apis/hiveinternal/v1alpha1" "github.com/openshift/hive/pkg/constants" hivemetrics "github.com/openshift/hive/pkg/controller/metrics" + "github.com/openshift/hive/pkg/controller/utils" controllerutils "github.com/openshift/hive/pkg/controller/utils" "github.com/openshift/hive/pkg/imageset" "github.com/openshift/hive/pkg/remoteclient" @@ -527,7 +528,7 @@ func (r *ReconcileClusterDeployment) reconcile(request reconcile.Request, cd *hi cdLog.Debugf("cluster expires at: %s", expiry) if time.Now().After(expiry) { cdLog.WithField("expiry", expiry).Info("cluster has expired, issuing delete") - err := r.Delete(context.TODO(), cd) + err := utils.SafeDelete(r, context.TODO(), cd) if err != nil { cdLog.WithError(err).Log(controllerutils.LogLevel(err), "error deleting expired cluster") } diff --git a/pkg/controller/clusterpool/clusterpool_controller.go b/pkg/controller/clusterpool/clusterpool_controller.go index ffa694f71f5..df8bdbb5d85 100644 --- a/pkg/controller/clusterpool/clusterpool_controller.go +++ b/pkg/controller/clusterpool/clusterpool_controller.go @@ -31,6 +31,7 @@ import ( "github.com/openshift/hive/pkg/clusterresource" "github.com/openshift/hive/pkg/constants" hivemetrics "github.com/openshift/hive/pkg/controller/metrics" + "github.com/openshift/hive/pkg/controller/utils" controllerutils "github.com/openshift/hive/pkg/controller/utils" ) @@ -344,7 +345,7 @@ func (r *ReconcileClusterPool) Reconcile(ctx context.Context, request reconcile. for _, cd := range toRemoveClaimedCDs[:toDel] { cdLog := logger.WithField("cluster", cd.Name) cdLog.Info("deleting cluster deployment for previous claim") - if err := r.Client.Delete(context.Background(), cd); err != nil { + if err := utils.SafeDelete(r, context.Background(), cd); err != nil { cdLog.WithError(err).Error("error deleting cluster deployment") return reconcile.Result{}, err } @@ -652,7 +653,7 @@ func (r *ReconcileClusterPool) deleteExcessClusters( for _, cd := range clustersToDelete { logger := logger.WithField("cluster", cd.Name) logger.Info("deleting cluster deployment") - if err := r.Client.Delete(context.Background(), cd); err != nil { + if err := utils.SafeDelete(r, context.Background(), cd); err != nil { logger.WithError(err).Error("error deleting cluster deployment") return err } @@ -673,7 +674,7 @@ func (r *ReconcileClusterPool) reconcileDeletedPool(pool *hivev1.ClusterPool, lo if cd.DeletionTimestamp != nil { continue } - if err := r.Delete(context.Background(), cd); err != nil { + if err := utils.SafeDelete(r, context.Background(), cd); err != nil { logger.WithError(err).WithField("cluster", cd.Name).Log(controllerutils.LogLevel(err), "could not delete ClusterDeployment") return errors.Wrap(err, "could not delete ClusterDeployment") } diff --git a/pkg/controller/utils/utils.go b/pkg/controller/utils/utils.go index 864f6d1aa08..1b8e6d15683 100644 --- a/pkg/controller/utils/utils.go +++ b/pkg/controller/utils/utils.go @@ -387,3 +387,8 @@ func SetProxyEnvVars(podSpec *corev1.PodSpec, httpProxy, httpsProxy, noProxy str setEnvVarOnContainers(podSpec, "NO_PROXY", noProxy) } } + +func SafeDelete(cl client.Client, ctx context.Context, obj client.Object) error { + rv := obj.GetResourceVersion() + return cl.Delete(ctx, obj, client.Preconditions{ResourceVersion: &rv}) +} diff --git a/pkg/controller/utils/utils_test.go b/pkg/controller/utils/utils_test.go index ff497f07fc8..82e3d6e03c4 100644 --- a/pkg/controller/utils/utils_test.go +++ b/pkg/controller/utils/utils_test.go @@ -1,24 +1,28 @@ package utils import ( + "context" goerrors "errors" "fmt" "os" "testing" "time" + hivev1 "github.com/openshift/hive/apis/hive/v1" hiveassert "github.com/openshift/hive/pkg/test/assert" "github.com/pkg/errors" log "github.com/sirupsen/logrus" "github.com/stretchr/testify/assert" "golang.org/x/time/rate" corev1 "k8s.io/api/core/v1" - apierrors "k8s.io/apimachinery/pkg/api/errors" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/client-go/rest" "k8s.io/client-go/util/flowcontrol" "k8s.io/client-go/util/workqueue" + "sigs.k8s.io/controller-runtime/pkg/client/fake" "sigs.k8s.io/controller-runtime/pkg/reconcile" ) @@ -490,3 +494,51 @@ func TestSetProxyEnvVars(t *testing.T) { }) } } + +func TestSafeDelete(t *testing.T) { + scheme := runtime.NewScheme() + hivev1.AddToScheme(scheme) + corev1.AddToScheme(scheme) + + fakeClient := fake.NewFakeClientWithScheme(scheme) + + cp := &hivev1.ClusterPool{ + ObjectMeta: v1.ObjectMeta{ + Name: "foo", + Namespace: "bar", + }, + } + + cp1 := cp.DeepCopy() + t.Log("1) Deletion with matching ResourceVersion") + if err := fakeClient.Create(context.Background(), cp1); err != nil { + t.Errorf("Failed initial creation: %v", err) + } + t.Logf("ResourceVersion after Creation: %s", cp1.ResourceVersion) + + // SafeDelete works if we don't muck with the ResourceVersion + if err := SafeDelete(fakeClient, context.Background(), cp1); err != nil { + t.Errorf("SafeDelete() error: %v", err) + } + + t.Log("2) Deletion with stale ResourceVersion") + cp2 := cp.DeepCopy() + if err := fakeClient.Create(context.Background(), cp2); err != nil { + t.Errorf("Failed creation: %v", err) + } + + cp3 := cp2.DeepCopy() + // Make a change to the created object + cp2.Spec.BaseDomain = "foo.example.com" + if err := fakeClient.Update(context.Background(), cp2); err != nil { + t.Errorf("Failed update: %v", err) + } + assert.NotEqual(t, cp3.ResourceVersion, cp2.ResourceVersion) + t.Logf("ResourceVersion after update: %s", cp2.ResourceVersion) + + // Now try to delete the object using the stale copy + err := SafeDelete(fakeClient, context.Background(), cp3) + assert.True( + t, apierrors.IsConflict(err), + "Expected conflict error deleting stale copy with ResourceVersion %s, but got %s", cp3.ResourceVersion, err) +}