From 86c960a5f1618e77a70cc7db9969eb2ac3ff161e Mon Sep 17 00:00:00 2001 From: Evan Phoenix Date: Mon, 13 Apr 2026 14:42:12 -0700 Subject: [PATCH] Unblock addon deprovisioning when provisioning saga is in-flight MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The addon reconciler ran with a single worker, so a long-running provisioning saga (e.g. valkey waiting on sandbox pool readiness) blocked every other addon event — including user-triggered deprovisioning. Bumps the worker count to 4 so independent associations reconcile in parallel (same-entity serialization is already guaranteed by the framework's in-flight map). Also adds a pre-flight re-read in provision() so a stale "pending" event (e.g. from startup resync) does not re-trigger provisioning on an association the user has already destroyed. Fixes MIR-970. --- components/coordinate/coordinate.go | 5 ++- controllers/addon/controller.go | 14 +++++++++ controllers/addon/controller_test.go | 47 ++++++++++++++++++++++++++++ 3 files changed, 65 insertions(+), 1 deletion(-) diff --git a/components/coordinate/coordinate.go b/components/coordinate/coordinate.go index cb70068b..6c42946b 100644 --- a/components/coordinate/coordinate.go +++ b/components/coordinate/coordinate.go @@ -1033,7 +1033,10 @@ func (c *Coordinator) Start(ctx context.Context) error { eac, controller.AdaptReconcileController[addon_v1alpha.AddonAssociation](addonController), time.Minute, - 1, + // Multiple workers so a long-running provisioning saga for one + // association does not block reconciliation of others. Same-entity + // concurrency is already prevented by ReconcileController.inFlight. + 4, ) c.cm.AddController(addonReconciler) } diff --git a/controllers/addon/controller.go b/controllers/addon/controller.go index 4d893832..95d8cf48 100644 --- a/controllers/addon/controller.go +++ b/controllers/addon/controller.go @@ -64,6 +64,20 @@ func (c *Controller) Reconcile(ctx context.Context, assoc *addon_v1alpha.AddonAs func (c *Controller) provision(ctx context.Context, assoc *addon_v1alpha.AddonAssociation, meta *entity.Meta) error { c.log.Info("provisioning addon", "association", assoc.ID, "addon", assoc.Addon, "variant", assoc.Variant) + // Re-read the association to guard against stale events (e.g. on startup + // resync, the reconcile event may carry an older revision than what's + // now in the store). If the user has already marked this association for + // deprovisioning, skip; the subsequent deprovisioning event will handle it. + var current addon_v1alpha.AddonAssociation + if err := c.ec.GetById(ctx, assoc.ID, ¤t); err != nil { + return fmt.Errorf("re-reading association: %w", err) + } + if current.Status != "pending" { + c.log.Info("association no longer pending, skipping provision", + "association", assoc.ID, "status", current.Status) + return nil + } + // Step 1: Set status to provisioning if err := meta.Update((&addon_v1alpha.AddonAssociation{Status: "provisioning"}).Encode()); err != nil { return fmt.Errorf("setting status to provisioning: %w", err) diff --git a/controllers/addon/controller_test.go b/controllers/addon/controller_test.go index b46f743a..03734c5d 100644 --- a/controllers/addon/controller_test.go +++ b/controllers/addon/controller_test.go @@ -246,6 +246,53 @@ func TestProvisionCompensatesOnPostProvisionFailure(t *testing.T) { assert.True(t, provider.deprovisionCalled, "Deprovision should have been called as compensation after post-provision failure") } +// TestProvisionSkipsWhenAssociationNoLongerPending verifies the pre-flight +// re-read: if a stale Reconcile event routes through provision() but the +// association has since moved to "deprovisioning" (e.g. on startup resync +// after the user ran `addon destroy`), provision() must be a no-op so the +// subsequent deprovisioning event can run. +func TestProvisionSkipsWhenAssociationNoLongerPending(t *testing.T) { + ctx, ctrl, ec, provider := setupControllerTest(t) + + appID, err := ec.Create(ctx, "myapp", &core_v1alpha.App{}) + require.NoError(t, err) + + addonID, err := ec.Create(ctx, "miren-postgresql", &addon_v1alpha.Addon{ + Name: "miren-postgresql", + }) + require.NoError(t, err) + + assocID, err := ec.Create(ctx, "test-assoc", &addon_v1alpha.AddonAssociation{ + App: appID, + Addon: addonID, + Variant: "small", + Status: "pending", + }) + require.NoError(t, err) + + // Capture a stale "pending" view of the association. + var stale addon_v1alpha.AddonAssociation + meta, err := getMeta(ctx, ec, assocID, &stale) + require.NoError(t, err) + + // In the meantime, the user destroys the addon and the status flips. + require.NoError(t, ec.Patch(ctx, assocID, 0, + (&addon_v1alpha.AddonAssociation{Status: "deprovisioning"}).Encode()..., + )) + + // Now reconcile the stale event. provision() should re-read, see the + // association is no longer "pending", and skip without calling the provider. + require.NoError(t, ctrl.Reconcile(ctx, &stale, meta)) + + assert.False(t, provider.provisionCalled, "Provision should NOT be called when association is no longer pending") + + // The on-disk status should remain "deprovisioning" — provision() must not + // have overwritten it. + var current addon_v1alpha.AddonAssociation + require.NoError(t, ec.GetById(ctx, assocID, ¤t)) + assert.Equal(t, "deprovisioning", current.Status) +} + func TestDeprovisionCompletesWhenAppDeleted(t *testing.T) { ctx, ctrl, ec, provider := setupControllerTest(t)