Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion components/coordinate/coordinate.go
Original file line number Diff line number Diff line change
Expand Up @@ -1033,7 +1033,10 @@ func (c *Coordinator) Start(ctx context.Context) error {
eac,
controller.AdaptReconcileController[addon_v1alpha.AddonAssociation](addonController),
time.Minute,
1,
// Multiple workers so a long-running provisioning saga for one
// association does not block reconciliation of others. Same-entity
// concurrency is already prevented by ReconcileController.inFlight.
4,
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any reason to go to 4 rather than 2 here? Two workers already solve the stated problem (one slow saga can't starve the queue), and each provisioning saga does a fair bit of entity store work (config versions, app versions, active version patches). Starting conservative and bumping later if someone actually hits the limit seems like the lower-risk move. Don't feel strongly tho, totally shippable as is!

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We already done 3 elsewhere and if these are slower, due to startup, felt like it was a good fit to reach for 4 for now.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SGTM!

)
c.cm.AddController(addonReconciler)
}
Expand Down
14 changes: 14 additions & 0 deletions controllers/addon/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,20 @@ func (c *Controller) Reconcile(ctx context.Context, assoc *addon_v1alpha.AddonAs
func (c *Controller) provision(ctx context.Context, assoc *addon_v1alpha.AddonAssociation, meta *entity.Meta) error {
c.log.Info("provisioning addon", "association", assoc.ID, "addon", assoc.Addon, "variant", assoc.Variant)

// Re-read the association to guard against stale events (e.g. on startup
// resync, the reconcile event may carry an older revision than what's
// now in the store). If the user has already marked this association for
// deprovisioning, skip; the subsequent deprovisioning event will handle it.
var current addon_v1alpha.AddonAssociation
if err := c.ec.GetById(ctx, assoc.ID, &current); err != nil {
return fmt.Errorf("re-reading association: %w", err)
}
if current.Status != "pending" {
c.log.Info("association no longer pending, skipping provision",
"association", assoc.ID, "status", current.Status)
return nil
}

// Step 1: Set status to provisioning
if err := meta.Update((&addon_v1alpha.AddonAssociation{Status: "provisioning"}).Encode()); err != nil {
return fmt.Errorf("setting status to provisioning: %w", err)
Expand Down
47 changes: 47 additions & 0 deletions controllers/addon/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,53 @@ func TestProvisionCompensatesOnPostProvisionFailure(t *testing.T) {
assert.True(t, provider.deprovisionCalled, "Deprovision should have been called as compensation after post-provision failure")
}

// TestProvisionSkipsWhenAssociationNoLongerPending verifies the pre-flight
// re-read: if a stale Reconcile event routes through provision() but the
// association has since moved to "deprovisioning" (e.g. on startup resync
// after the user ran `addon destroy`), provision() must be a no-op so the
// subsequent deprovisioning event can run.
func TestProvisionSkipsWhenAssociationNoLongerPending(t *testing.T) {
ctx, ctrl, ec, provider := setupControllerTest(t)

appID, err := ec.Create(ctx, "myapp", &core_v1alpha.App{})
require.NoError(t, err)

addonID, err := ec.Create(ctx, "miren-postgresql", &addon_v1alpha.Addon{
Name: "miren-postgresql",
})
require.NoError(t, err)

assocID, err := ec.Create(ctx, "test-assoc", &addon_v1alpha.AddonAssociation{
App: appID,
Addon: addonID,
Variant: "small",
Status: "pending",
})
require.NoError(t, err)

// Capture a stale "pending" view of the association.
var stale addon_v1alpha.AddonAssociation
meta, err := getMeta(ctx, ec, assocID, &stale)
require.NoError(t, err)

// In the meantime, the user destroys the addon and the status flips.
require.NoError(t, ec.Patch(ctx, assocID, 0,
(&addon_v1alpha.AddonAssociation{Status: "deprovisioning"}).Encode()...,
))

// Now reconcile the stale event. provision() should re-read, see the
// association is no longer "pending", and skip without calling the provider.
require.NoError(t, ctrl.Reconcile(ctx, &stale, meta))

assert.False(t, provider.provisionCalled, "Provision should NOT be called when association is no longer pending")

// The on-disk status should remain "deprovisioning" — provision() must not
// have overwritten it.
var current addon_v1alpha.AddonAssociation
require.NoError(t, ec.GetById(ctx, assocID, &current))
assert.Equal(t, "deprovisioning", current.Status)
}

func TestDeprovisionCompletesWhenAppDeleted(t *testing.T) {
ctx, ctrl, ec, provider := setupControllerTest(t)

Expand Down
Loading