Skip to content

Commit

Permalink
Power off nodes upon deletion
Browse files Browse the repository at this point in the history
Co-authored-by: Sandhya Dasu <sadasu@redhat.com>
  • Loading branch information
honza and Sandhya Dasu committed Dec 19, 2022
1 parent 84a7ea9 commit 2f4593e
Show file tree
Hide file tree
Showing 8 changed files with 39 additions and 9 deletions.
3 changes: 3 additions & 0 deletions apis/metal3.io/v1alpha1/baremetalhost_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,9 @@ const (
// DetachError is an error condition occurring when the
// controller is unable to detatch the host from the provisioner
DetachError ErrorType = "detach error"
// DeleteError is an error condition occurring when the controller
// is unable to delete the host.
DeleteError ErrorType = "delete error"
)

// ProvisioningState defines the states the provisioner will report
Expand Down
29 changes: 28 additions & 1 deletion controllers/metal3.io/baremetalhost_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ const (
hardwareDetailsAnnotation = inspectAnnotationPrefix + "/hardwaredetails"
clarifySoftPoweroffFailure = "Continuing with hard poweroff after soft poweroff fails. More details: "
hardwareDataFinalizer = metal3v1alpha1.BareMetalHostFinalizer + "/hardwareData"
maxPowerOffRetryCount = 3
)

// BareMetalHostReconciler reconciles a BareMetalHost object
Expand Down Expand Up @@ -321,6 +322,7 @@ func recordActionFailure(info *reconcileInfo, errorType metal3v1alpha1.ErrorType
metal3v1alpha1.InspectionError: "InspectionError",
metal3v1alpha1.ProvisioningError: "ProvisioningError",
metal3v1alpha1.PowerManagementError: "PowerManagementError",
metal3v1alpha1.DeleteError: "DeleteError",
}[errorType]

counter := actionFailureCounters.WithLabelValues(eventType)
Expand Down Expand Up @@ -494,10 +496,35 @@ func (r *BareMetalHostReconciler) actionDeleting(prov provisioner.Provisioner, i
return deleteComplete{}
}

provResult, err := prov.Delete()
// Only try to power off `maxPowerOffRetryCount` times before giving up, and deleting the host anyway
if info.host.Status.ErrorType != metal3v1alpha1.PowerManagementError || (info.host.Status.ErrorType == metal3v1alpha1.PowerManagementError && info.host.Status.ErrorCount < maxPowerOffRetryCount) {
info.log.Info("host ready to be powered off")
provResult, err := prov.PowerOff(
metal3v1alpha1.RebootModeHard,
info.host.Status.ErrorType == metal3v1alpha1.PowerManagementError)

if err != nil {
return actionError{errors.Wrap(err, "failed to power off")}
}

if provResult.ErrorMessage != "" {
return recordActionFailure(info, metal3v1alpha1.PowerManagementError, provResult.ErrorMessage)
}

if provResult.Dirty {
return actionContinue{provResult.RequeueAfter}
}
}

provResult, err := prov.Delete(info.host.Status.ErrorType == metal3v1alpha1.DeleteError)
if err != nil {
return actionError{errors.Wrap(err, "failed to delete")}
}

if provResult.ErrorMessage != "" {
return recordActionFailure(info, metal3v1alpha1.DeleteError, provResult.ErrorMessage)
}

if provResult.Dirty {
return actionContinue{provResult.RequeueAfter}
}
Expand Down
2 changes: 1 addition & 1 deletion controllers/metal3.io/host_state_machine_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1287,7 +1287,7 @@ func (m *mockProvisioner) Deprovision(force bool) (result provisioner.Result, er
return m.getNextResultByMethod("Deprovision"), err
}

func (m *mockProvisioner) Delete() (result provisioner.Result, err error) {
func (m *mockProvisioner) Delete(force bool) (result provisioner.Result, err error) {
return m.getNextResultByMethod("Delete"), err
}

Expand Down
2 changes: 1 addition & 1 deletion pkg/provisioner/demo/demo.go
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,7 @@ func (p *demoProvisioner) Deprovision(force bool) (result provisioner.Result, er
// Delete removes the host from the provisioning system. It may be
// called multiple times, and should return true for its dirty flag
// until the deprovisioning operation is completed.
func (p *demoProvisioner) Delete() (result provisioner.Result, err error) {
func (p *demoProvisioner) Delete(force bool) (result provisioner.Result, err error) {
p.log.Info("deleting host")
return result, nil
}
Expand Down
4 changes: 2 additions & 2 deletions pkg/provisioner/fixture/fixture.go
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ func (p *fixtureProvisioner) Deprovision(force bool) (result provisioner.Result,
// Delete removes the host from the provisioning system. It may be
// called multiple times, and should return true for its dirty flag
// until the deprovisioning operation is completed.
func (p *fixtureProvisioner) Delete() (result provisioner.Result, err error) {
func (p *fixtureProvisioner) Delete(force bool) (result provisioner.Result, err error) {
p.log.Info("deleting host")

if !p.state.Deleted {
Expand All @@ -290,7 +290,7 @@ func (p *fixtureProvisioner) Delete() (result provisioner.Result, err error) {
// and should return true for its dirty flag until the
// deletion operation is completed.
func (p *fixtureProvisioner) Detach() (result provisioner.Result, err error) {
return p.Delete()
return p.Delete(true)
}

// PowerOn ensures the server is powered on independently of any image
Expand Down
2 changes: 1 addition & 1 deletion pkg/provisioner/ironic/delete_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ func deleteTest(t *testing.T, detach bool) {
if detach {
result, err = prov.Detach()
} else {
result, err = prov.Delete()
result, err = prov.Delete(true)
}

assert.Equal(t, tc.expectedDirty, result.Dirty)
Expand Down
4 changes: 2 additions & 2 deletions pkg/provisioner/ironic/ironic.go
Original file line number Diff line number Diff line change
Expand Up @@ -1707,7 +1707,7 @@ func (p *ironicProvisioner) Deprovision(force bool) (result provisioner.Result,
// Delete removes the host from the provisioning system. It may be
// called multiple times, and should return true for its dirty flag
// until the deprovisioning operation is completed.
func (p *ironicProvisioner) Delete() (result provisioner.Result, err error) {
func (p *ironicProvisioner) Delete(force bool) (result provisioner.Result, err error) {
ironicNode, err := p.getNode()
if err != nil {
if errors.Is(err, provisioner.ErrNeedsRegistration) {
Expand Down Expand Up @@ -1777,7 +1777,7 @@ func (p *ironicProvisioner) Delete() (result provisioner.Result, err error) {
// deletion operation is completed.
func (p *ironicProvisioner) Detach() (result provisioner.Result, err error) {
// Currently the same behavior as Delete()
return p.Delete()
return p.Delete(true)
}

// softPowerOffUnsupportedError is returned when the BMC does not
Expand Down
2 changes: 1 addition & 1 deletion pkg/provisioner/provisioner.go
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ type Provisioner interface {
// Delete removes the host from the provisioning system. It may be
// called multiple times, and should return true for its dirty
// flag until the deletion operation is completed.
Delete() (result Result, err error)
Delete(force bool) (result Result, err error)

// Detach removes the host from the provisioning system.
// Similar to Delete, but ensures non-interruptive behavior
Expand Down

0 comments on commit 2f4593e

Please sign in to comment.