Skip to content
This repository has been archived by the owner on May 6, 2022. It is now read-only.

handle instance deletion that occurs during async provisioning or async update (#1587) #1708

32 changes: 32 additions & 0 deletions pkg/controller/controller_instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -1434,8 +1434,16 @@ func (c *controller) processProvisionSuccess(instance *v1beta1.ServiceInstance,
setServiceInstanceDashboardURL(instance, dashboardURL)
setServiceInstanceCondition(instance, v1beta1.ServiceInstanceConditionReady, v1beta1.ConditionTrue, successProvisionReason, successProvisionMessage)
instance.Status.ExternalProperties = instance.Status.InProgressProperties
currentReconciledGeneration := instance.Status.ReconciledGeneration
clearServiceInstanceCurrentOperation(instance)

if instance.DeletionTimestamp != nil {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is a fine workaround for now but I do think that we should consider, as a follow-up to this, storing the generation that the in-progress operation is for, so that when we complete an operation we can set the reconciled generation correctly to the generation that started the operation.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed, we should store a correct reconciledGeneration 😀

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jboyd01 leave a TODO then?

// A request to delete the Instance was received during provisioning, don't bump
// ReconciledGeneration as that will prevent processing the delete.
glog.V(4).Infof("Not updating ReconciledGeneration after instance provisioning because there is a deletion pending.")
instance.Status.ReconciledGeneration = currentReconciledGeneration
}

if _, err := c.updateServiceInstanceStatus(instance); err != nil {
return err
}
Expand All @@ -1447,6 +1455,7 @@ func (c *controller) processProvisionSuccess(instance *v1beta1.ServiceInstance,
// processProvisionFailure handles the logging and updating of a
// ServiceInstance that hit a terminal failure during provision reconciliation.
func (c *controller) processProvisionFailure(instance *v1beta1.ServiceInstance, readyCond, failedCond *v1beta1.ServiceInstanceCondition, shouldMitigateOrphan bool) error {
currentReconciledGeneration := instance.Status.ReconciledGeneration
if failedCond == nil {
return fmt.Errorf("failedCond must not be nil")
}
Expand Down Expand Up @@ -1474,6 +1483,13 @@ func (c *controller) processProvisionFailure(instance *v1beta1.ServiceInstance,
err = fmt.Errorf(failedCond.Message)
} else {
clearServiceInstanceCurrentOperation(instance)

if instance.DeletionTimestamp != nil {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could you extract this code to a separate method instead of copy-pasting? it will make a cleanup in a follow-up PR much easier.

// A request to delete the Instance was received during provisioning, don't bump
// ReconciledGeneration as that will prevent processing the delete.
glog.V(4).Infof("Not updating ReconciledGeneration after instance provisioning failure because there is a deletion pending.")
instance.Status.ReconciledGeneration = currentReconciledGeneration
}
}

if _, err := c.updateServiceInstanceStatus(instance); err != nil {
Expand Down Expand Up @@ -1505,8 +1521,16 @@ func (c *controller) processProvisionAsyncResponse(instance *v1beta1.ServiceInst
func (c *controller) processUpdateServiceInstanceSuccess(instance *v1beta1.ServiceInstance) error {
setServiceInstanceCondition(instance, v1beta1.ServiceInstanceConditionReady, v1beta1.ConditionTrue, successUpdateInstanceReason, successUpdateInstanceMessage)
instance.Status.ExternalProperties = instance.Status.InProgressProperties
currentReconciledGeneration := instance.Status.ReconciledGeneration
clearServiceInstanceCurrentOperation(instance)

if instance.DeletionTimestamp != nil {
// A request to delete the Instance was received during async update, don't bump
// ReconciledGeneration as that will prevent processing the update
glog.V(4).Infof("Not updating ReconciledGeneration after async update because there is a deletion pending.")
instance.Status.ReconciledGeneration = currentReconciledGeneration
}

if _, err := c.updateServiceInstanceStatus(instance); err != nil {
return err
}
Expand All @@ -1519,10 +1543,18 @@ func (c *controller) processUpdateServiceInstanceSuccess(instance *v1beta1.Servi
// ServiceInstance that hit a terminal failure during update reconciliation.
func (c *controller) processUpdateServiceInstanceFailure(instance *v1beta1.ServiceInstance, readyCond *v1beta1.ServiceInstanceCondition) error {
c.recorder.Event(instance, corev1.EventTypeWarning, readyCond.Reason, readyCond.Message)
currentReconciledGeneration := instance.Status.ReconciledGeneration

setServiceInstanceCondition(instance, v1beta1.ServiceInstanceConditionReady, readyCond.Status, readyCond.Reason, readyCond.Message)
clearServiceInstanceCurrentOperation(instance)

if instance.DeletionTimestamp != nil {
// A request to delete the Instance was received during async update, don't bump
// ReconciledGeneration as that will prevent processing the update
glog.V(4).Infof("Not updating ReconciledGeneration after async update failure because there is a deletion pending.")
instance.Status.ReconciledGeneration = currentReconciledGeneration
}

if _, err := c.updateServiceInstanceStatus(instance); err != nil {
return err
}
Expand Down
175 changes: 175 additions & 0 deletions test/integration/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,181 @@ func TestAsyncProvisionWithMultiplePolls(t *testing.T) {
})
}

// TestServiceInstanceDeleteWithAsyncUpdateInProgress tests that you can delete
// an instance during an async update. That is, if you request a delete during
// an instance update, the instance will be deleted when the update completes
// regardless of success or failure.
func TestServiceInstanceDeleteWithAsyncUpdateInProgress(t *testing.T) {

cases := []struct {
name string
updateSucceeds bool
}{
{
name: "update succeeds",
updateSucceeds: true,
},
{
name: "update fails",
updateSucceeds: false,
},
}

for _, tc := range cases {
tc := tc
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
done := make(chan bool)

ct := controllerTest{
t: t,
broker: getTestBroker(),
instance: getTestInstance(),
skipVerifyingInstanceSuccess: false,
setup: func(ct *controllerTest) {
ct.osbClient.UpdateInstanceReaction.(*fakeosb.UpdateInstanceReaction).Response.Async = true
ct.osbClient.PollLastOperationReaction = fakeosb.DynamicPollLastOperationReaction(
func(_ *osb.LastOperationRequest) (*osb.LastOperationResponse, error) {
state := osb.StateInProgress
select {
// nonblocking check for a message to finish async update, return inProgress otherwise
case <-done:
if tc.updateSucceeds {
state = osb.StateSucceeded
} else {
state = osb.StateFailed
}
default:
}
return &osb.LastOperationResponse{State: state}, nil
})
},
}
ct.run(func(ct *controllerTest) {

if err := util.WaitForInstanceCondition(ct.client, ct.instance.Namespace, ct.instance.Name,
v1beta1.ServiceInstanceCondition{
Type: v1beta1.ServiceInstanceConditionReady,
Status: v1beta1.ConditionTrue,
Reason: "ProvisionedSuccessfully",
}); err != nil {
t.Fatalf("error waiting for instance to be ready: %v", err)
}

// add a parameter to the instance for the Update()
ct.instance.Spec.Parameters = convertParametersIntoRawExtension(t,
map[string]interface{}{
"param-key": "new-param-value",
})

_, err := ct.client.ServiceInstances(testNamespace).Update(ct.instance)
if err != nil {
t.Fatalf("error updating instance: %v", err)
}

if err := util.WaitForInstanceCondition(ct.client, ct.instance.Namespace, ct.instance.Name,
v1beta1.ServiceInstanceCondition{
Type: v1beta1.ServiceInstanceConditionReady,
Status: v1beta1.ConditionFalse,
Reason: "UpdatingInstance",
}); err != nil {
t.Fatalf("error waiting for instance to be updating asynchronously: %v", err)
}

if err := ct.client.ServiceInstances(ct.instance.Namespace).Delete(ct.instance.Name, &metav1.DeleteOptions{}); err != nil {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe there's a race here between the update completing and the deletion timestamp getting set. Instead of basing the response of the last operation on the number of polls that occurred, I think we should use a boolean (shouldFinishPolling?) that we toggle from false to true after issuing the delete request.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I concur with @kibbles-n-bytes on this. So that it's clear, the same race exists in the other test.

t.Fatalf("failed to delete instance: %v", err)
}

// notify the thread handling DynamicPollLastOperationReaction that it can end the async op
done <- true

if err := util.WaitForInstanceToNotExist(ct.client, ct.instance.Namespace, ct.instance.Name); err != nil {
t.Fatalf("error waiting for instance to not exist: %v", err)
}

// We deleted the instance above, clear it so test cleanup doesn't fail
ct.instance = nil
})
})
}
}

// TestServiceInstanceDeleteWithAsyncProvisionInProgress tests that you can
// delete an instance during an async provision. Verify the instance is deleted
// when the provisioning completes regardless of success or failure.
func TestServiceInstanceDeleteWithAsyncProvisionInProgress(t *testing.T) {
cases := []struct {
name string
provisionSucceeds bool
}{
{
name: "provision succeeds",
provisionSucceeds: true,
},
{
name: "provision fails",
provisionSucceeds: false,
},
}

for _, tc := range cases {
tc := tc
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
done := make(chan bool)

ct := controllerTest{
t: t,
broker: getTestBroker(),
instance: getTestInstance(),
skipVerifyingInstanceSuccess: true,
setup: func(ct *controllerTest) {
ct.osbClient.ProvisionReaction.(*fakeosb.ProvisionReaction).Response.Async = true
ct.osbClient.PollLastOperationReaction = fakeosb.DynamicPollLastOperationReaction(
func(_ *osb.LastOperationRequest) (*osb.LastOperationResponse, error) {
state := osb.StateInProgress
select {
// nonblocking check for a message to finish async provisioning, return inProgress otherwise
case <-done:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There might still be a race here. If a reconciliation starts, and then the deletion timestamp is set, we'd pull true out of done but then fail to update the instance (as our resource is out of date). The next reconciliation loop, we'd then be back to the channel being empty.

I think a better gauge would be if the channel is closed or not.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@kibbles-n-bytes, are you ok with instead moving the declaration and initialization of state := osb.StateInProgress up to the top of the Run() ? This way reading from the channel toggles us to a done state and we stay there for any additional polls.

Copy link
Contributor

@staebler staebler Feb 14, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about using an atomic int instead of a channel at all? I think that makes it clearer what is going on from a readability standpoint.

The reaction function reads the atomic int. If the value is 0, then the state is in-progress. Otherwise, the state is succeeded or failed. The run function sets the atomic int to 1 after making the delete.

if tc.provisionSucceeds {
state = osb.StateSucceeded
} else {
state = osb.StateFailed
}
default:
}
return &osb.LastOperationResponse{State: state}, nil
})
},
}
ct.run(func(ct *controllerTest) {
if err := util.WaitForInstanceCondition(ct.client, ct.instance.Namespace, ct.instance.Name,
v1beta1.ServiceInstanceCondition{
Type: v1beta1.ServiceInstanceConditionReady,
Status: v1beta1.ConditionFalse,
Reason: "Provisioning",
}); err != nil {
t.Fatalf("error waiting for instance to be provisioning asynchronously: %v", err)
}

if err := ct.client.ServiceInstances(ct.instance.Namespace).Delete(ct.instance.Name, &metav1.DeleteOptions{}); err != nil {
t.Fatalf("failed to delete instance: %v", err)
}

// notify the thread handling DynamicPollLastOperationReaction that it can end the async op
done <- true

if err := util.WaitForInstanceToNotExist(ct.client, ct.instance.Namespace, ct.instance.Name); err != nil {
t.Fatalf("error waiting for instance to not exist: %v", err)
}

// We deleted the instance above, clear it so test cleanup doesn't fail
ct.instance = nil
})
})
}
}

func getUpdateInstanceResponseByPollCountReactions(numOfResponses int, stateProgressions []fakeosb.UpdateInstanceReaction) fakeosb.DynamicUpdateInstanceReaction {
numberOfPolls := 0
numberOfStates := len(stateProgressions)
Expand Down