Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

worker/undertaker: don't ignore all errors #7761

Merged
merged 1 commit into from
Aug 18, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion api/controller/legacy_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ func (s *legacySuite) TestDestroyController(c *gc.C) {
sysManager := s.OpenAPI(c)
defer sysManager.Close()
err := sysManager.DestroyController(false)
c.Assert(err, gc.ErrorMatches, `failed to destroy model: hosting 1 other models \(controller has hosted models\)`)
c.Assert(err, gc.ErrorMatches, `failed to destroy model: hosting 1 other model \(controller has hosted models\)`)
}

func (s *legacySuite) TestListBlockedModels(c *gc.C) {
Expand Down
4 changes: 4 additions & 0 deletions apiserver/common/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,8 @@ func ServerError(err error) *params.Error {
code = params.CodeHasAssignedUnits
case state.IsHasHostedModelsError(err):
code = params.CodeHasHostedModels
case state.IsModelNotEmptyError(err):
code = params.CodeModelNotEmpty
case isNoAddressSetError(err):
code = params.CodeNoAddressSet
case errors.IsNotProvisioned(err):
Expand Down Expand Up @@ -308,6 +310,8 @@ func RestoreError(err error) error {
return err
case params.IsCodeHasHostedModels(err):
return err
case params.IsCodeModelNotEmpty(err):
return err
case params.IsCodeNoAddressSet(err):
// TODO(ericsnow) Handle isNoAddressSetError here.
// ...by parsing msg?
Expand Down
6 changes: 4 additions & 2 deletions apiserver/common/modeldestroy_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ func (s *destroyTwoModelsSuite) TestDestroyControllerAfterNonControllerIsDestroy
otherFactory.MakeMachineNested(c, m.Id(), nil)

err := common.DestroyModel(s.modelManager, s.State.ModelTag())
c.Assert(err, gc.ErrorMatches, "failed to destroy model: hosting 1 other models")
c.Assert(err, gc.ErrorMatches, "failed to destroy model: hosting 1 other model")

needsCleanup, err := s.State.NeedsCleanup()
c.Assert(err, jc.ErrorIsNil)
Expand All @@ -319,7 +319,7 @@ func (s *destroyTwoModelsSuite) TestDestroyControllerAfterNonControllerIsDestroy
// The hosted model is Dying, not Dead; we cannot destroy
// the controller model until all hosted models are Dead.
err = common.DestroyModel(s.modelManager, s.State.ModelTag())
c.Assert(err, gc.ErrorMatches, "failed to destroy model: hosting 1 other models")
c.Assert(err, gc.ErrorMatches, "failed to destroy model: hosting 1 other model")

// Continue to take the hosted model down so we can
// destroy the controller model.
Expand All @@ -333,6 +333,8 @@ func (s *destroyTwoModelsSuite) TestDestroyControllerAfterNonControllerIsDestroy
otherEnv, err := s.otherState.Model()
c.Assert(err, jc.ErrorIsNil)
c.Assert(otherEnv.Life(), gc.Equals, state.Dead)
err = s.otherState.RemoveAllModelDocs()
c.Assert(err, jc.ErrorIsNil)

env, err := s.State.Model()
c.Assert(err, jc.ErrorIsNil)
Expand Down
5 changes: 5 additions & 0 deletions apiserver/params/apierror.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ const (
CodeDead = "dead"
CodeHasAssignedUnits = "machine has assigned units"
CodeHasHostedModels = "controller has hosted models"
CodeModelNotEmpty = "model not empty"
CodeMachineHasAttachedStorage = "machine has attached storage"
CodeNotProvisioned = "not provisioned"
CodeNoAddressSet = "no address set"
Expand Down Expand Up @@ -184,6 +185,10 @@ func IsCodeHasHostedModels(err error) bool {
return ErrCode(err) == CodeHasHostedModels
}

func IsCodeModelNotEmpty(err error) bool {
return ErrCode(err) == CodeModelNotEmpty
}

func IsCodeMachineHasAttachedStorage(err error) bool {
return ErrCode(err) == CodeMachineHasAttachedStorage
}
Expand Down
2 changes: 1 addition & 1 deletion featuretests/api_undertaker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ func (s *undertakerSuite) TestStateProcessDyingEnviron(c *gc.C) {
c.Assert(env.Life(), gc.Equals, state.Dying)

err = undertakerClient.ProcessDyingModel()
c.Assert(err, gc.ErrorMatches, `model not empty, found 1 machine\(s\)`)
c.Assert(err, gc.ErrorMatches, `model not empty, found 1 machine \(model not empty\)`)
}

func (s *undertakerSuite) TestStateRemoveEnvironFails(c *gc.C) {
Expand Down
74 changes: 56 additions & 18 deletions state/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ package state

import (
"fmt"
"strings"

"github.com/juju/errors"
jujutxn "github.com/juju/txn"
Expand Down Expand Up @@ -898,7 +899,11 @@ var errModelNotAlive = errors.New("model is no longer alive")
type hasHostedModelsError int

func (e hasHostedModelsError) Error() string {
return fmt.Sprintf("hosting %d other models", e)
s := ""
if e != 1 {
s = "s"
}
return fmt.Sprintf("hosting %d other model"+s, e)
}

func IsHasHostedModelsError(err error) bool {
Expand Down Expand Up @@ -1046,6 +1051,47 @@ func (m *Model) destroyOps(ensureNoHostedModels, ensureEmpty bool) ([]txn.Op, er
return append(prereqOps, ops...), nil
}

type modelNotEmptyError struct {
machines int
applications int
volumes int
filesystems int
}

// Error is part of the error interface.
func (e modelNotEmptyError) Error() string {
msg := "model not empty, found "
plural := func(n int, thing string) string {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice usability consideration :D

s := fmt.Sprintf("%d %s", n, thing)
if n != 1 {
s += "s"
}
return s
}
var contains []string
if n := e.machines; n > 0 {
contains = append(contains, plural(n, "machine"))
}
if n := e.applications; n > 0 {
contains = append(contains, plural(n, "application"))
}
if n := e.volumes; n > 0 {
contains = append(contains, plural(n, "volume"))
}
if n := e.filesystems; n > 0 {
contains = append(contains, plural(n, "filesystem"))
}
return msg + strings.Join(contains, ", ")
}

// IsModelNotEmptyError reports whether or not the given error was caused
// due to an operation requiring a model to be empty, where the model is
// non-empty.
func IsModelNotEmptyError(err error) bool {
_, ok := errors.Cause(err).(modelNotEmptyError)
return ok
}

// checkEmpty checks that the machine is empty of any entities that may
// require external resource cleanup. If the model is not empty, then
// an error will be returned.
Expand All @@ -1060,25 +1106,17 @@ func (m *Model) checkEmpty() error {
}
return errors.Annotatef(err, "getting entity references for model %s", m.UUID())
}
// These errors could be potentially swallowed as we re-try to destroy model.
// Let's, at least, log them for observation.
if n := len(doc.Machines); n > 0 {
logger.Infof("model is still not empty, has machines: %v", doc.Machines)
return errors.Errorf("model not empty, found %d machine(s)", n)
}
if n := len(doc.Applications); n > 0 {
logger.Infof("model is still not empty, has applications: %v", doc.Applications)
return errors.Errorf("model not empty, found %d application(s)", n)
}
if n := len(doc.Volumes); n > 0 {
logger.Infof("model is still not empty, has volumes: %v", doc.Volumes)
return errors.Errorf("model not empty, found %d volume(s)", n)

err := modelNotEmptyError{
machines: len(doc.Machines),
applications: len(doc.Applications),
volumes: len(doc.Volumes),
filesystems: len(doc.Filesystems),
}
if n := len(doc.Filesystems); n > 0 {
logger.Infof("model is still not empty, has file systems: %v", doc.Filesystems)
return errors.Errorf("model not empty, found %d filesystem(s)", n)
if err == (modelNotEmptyError{}) {
return nil
}
return nil
return err
}

func addModelMachineRefOp(st *State, machineId string) txn.Op {
Expand Down
29 changes: 21 additions & 8 deletions state/model_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,7 @@ func (s *ModelSuite) TestDestroyControllerNonEmptyModelFails(c *gc.C) {

env, err := s.State.Model()
c.Assert(err, jc.ErrorIsNil)
c.Assert(env.Destroy(), gc.ErrorMatches, "failed to destroy model: hosting 1 other models")
c.Assert(env.Destroy(), gc.ErrorMatches, "failed to destroy model: hosting 1 other model")
}

func (s *ModelSuite) TestDestroyControllerEmptyModel(c *gc.C) {
Expand Down Expand Up @@ -465,6 +465,8 @@ func (s *ModelSuite) TestDestroyControllerAndHostedModels(c *gc.C) {

c.Assert(env2.Refresh(), jc.ErrorIsNil)
c.Assert(env2.Life(), gc.Equals, state.Dead)
err = st2.RemoveAllModelDocs()
c.Assert(err, jc.ErrorIsNil)

c.Assert(s.State.ProcessDyingModel(), jc.ErrorIsNil)
c.Assert(env.Refresh(), jc.ErrorIsNil)
Expand Down Expand Up @@ -513,7 +515,8 @@ func (s *ModelSuite) TestDestroyControllerAndHostedModelsWithResources(c *gc.C)
assertEnv(controllerEnv, s.State, state.Dying, 0, 0)

err = s.State.ProcessDyingModel()
c.Assert(err, gc.ErrorMatches, `one or more hosted models are not yet dead`)
c.Assert(err, jc.Satisfies, state.IsHasHostedModelsError)
c.Assert(err, gc.ErrorMatches, `hosting 1 other model`)

assertCleanupCount(c, otherSt, 3)
assertAllMachinesDeadAndRemove(c, otherSt)
Expand All @@ -523,6 +526,12 @@ func (s *ModelSuite) TestDestroyControllerAndHostedModelsWithResources(c *gc.C)
c.Assert(otherEnv.Refresh(), jc.ErrorIsNil)
c.Assert(otherEnv.Life(), gc.Equals, state.Dead)

// Until the model is removed, we can't mark the controller model Dead.
err = s.State.ProcessDyingModel()
c.Assert(err, gc.ErrorMatches, `hosting 1 other model`)

err = otherSt.RemoveAllModelDocs()
c.Assert(err, jc.ErrorIsNil)
c.Assert(s.State.ProcessDyingModel(), jc.ErrorIsNil)
c.Assert(controllerEnv.Refresh(), jc.ErrorIsNil)
c.Assert(controllerEnv.Life(), gc.Equals, state.Dead)
Expand Down Expand Up @@ -566,7 +575,7 @@ func (s *ModelSuite) TestDestroyControllerRemoveEmptyAddNonEmptyModel(c *gc.C) {

env, err := s.State.Model()
c.Assert(err, jc.ErrorIsNil)
c.Assert(env.Destroy(), gc.ErrorMatches, "failed to destroy model: hosting 1 other models")
c.Assert(env.Destroy(), gc.ErrorMatches, "failed to destroy model: hosting 1 other model")
}

func (s *ModelSuite) TestDestroyControllerNonEmptyModelRace(c *gc.C) {
Expand All @@ -580,7 +589,7 @@ func (s *ModelSuite) TestDestroyControllerNonEmptyModelRace(c *gc.C) {

env, err := s.State.Model()
c.Assert(err, jc.ErrorIsNil)
c.Assert(env.Destroy(), gc.ErrorMatches, "failed to destroy model: hosting 1 other models")
c.Assert(env.Destroy(), gc.ErrorMatches, "failed to destroy model: hosting 1 other model")
}

func (s *ModelSuite) TestDestroyControllerAlreadyDyingRaceNoOp(c *gc.C) {
Expand Down Expand Up @@ -735,7 +744,8 @@ func (s *ModelSuite) TestProcessDyingModelWithMachinesAndServicesNoOp(c *gc.C) {
defer state.SetAfterHooks(c, st, func() {
assertEnv(state.Dying, 1, 1)
err := st.ProcessDyingModel()
c.Assert(err, gc.ErrorMatches, `model not empty, found 1 machine\(s\)`)
c.Assert(err, jc.Satisfies, state.IsModelNotEmptyError)
c.Assert(err, gc.ErrorMatches, `model not empty, found 1 machine, 1 application`)
assertEnv(state.Dying, 1, 1)
}).Check()

Expand Down Expand Up @@ -781,7 +791,8 @@ func (s *ModelSuite) TestProcessDyingModelWithVolumeBackedFilesystems(c *gc.C) {
// The filesystem will be gone, but the volume is persistent and should
// not have been removed.
err = st.ProcessDyingModel()
c.Assert(err, gc.ErrorMatches, `model not empty, found 1 volume\(s\)`)
c.Assert(err, jc.Satisfies, state.IsModelNotEmptyError)
c.Assert(err, gc.ErrorMatches, `model not empty, found 1 volume`)
}

func (s *ModelSuite) TestProcessDyingModelWithVolumes(c *gc.C) {
Expand Down Expand Up @@ -818,7 +829,8 @@ func (s *ModelSuite) TestProcessDyingModelWithVolumes(c *gc.C) {
// The volume is persistent and should not have been removed along with
// the machine it was attached to.
err = st.ProcessDyingModel()
c.Assert(err, gc.ErrorMatches, `model not empty, found 1 volume\(s\)`)
c.Assert(err, jc.Satisfies, state.IsModelNotEmptyError)
c.Assert(err, gc.ErrorMatches, `model not empty, found 1 volume`)
}

func (s *ModelSuite) TestProcessDyingControllerEnvironWithHostedEnvsNoOp(c *gc.C) {
Expand All @@ -832,7 +844,8 @@ func (s *ModelSuite) TestProcessDyingControllerEnvironWithHostedEnvsNoOp(c *gc.C
c.Assert(controllerEnv.DestroyIncludingHosted(), jc.ErrorIsNil)

err = s.State.ProcessDyingModel()
c.Assert(err, gc.ErrorMatches, `one or more hosted models are not yet dead`)
c.Assert(err, jc.Satisfies, state.IsHasHostedModelsError)
c.Assert(err, gc.ErrorMatches, `hosting 1 other model`)

c.Assert(controllerEnv.Refresh(), jc.ErrorIsNil)
c.Assert(controllerEnv.Life(), gc.Equals, state.Dying)
Expand Down
18 changes: 12 additions & 6 deletions state/undertaker.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,13 @@ import (

var ErrModelNotDying = errors.New("model is not dying")

// ProcessDyingModel checks if there are any machines or services left in
// state. If there are none, the model's life is changed from dying to dead.
// ProcessDyingModel checks if the model is Dying and empty, and if so,
// transitions the model to Dead.
//
// If the model is non-empty because it is the controller model and still
// contains hosted models, an error satisfying IsHasHostedModelsError will
// be returned. If the model is otherwise non-empty, an error satisfying
// IsNonEmptyModelError will be returned.
func (st *State) ProcessDyingModel() (err error) {
buildTxn := func(attempt int) ([]txn.Op, error) {
model, err := st.Model()
Expand All @@ -25,14 +30,15 @@ func (st *State) ProcessDyingModel() (err error) {
}

if st.IsController() {
// We should not mark the controller model as Dead until
// all hosted models have been removed, otherwise the
// hosted model environs may not have beeen destroyed.
models, err := st.AllModels()
if err != nil {
return nil, errors.Trace(err)
}
for _, model := range models {
if model.UUID() != st.ModelUUID() && model.Life() != Dead {
return nil, errors.Errorf("one or more hosted models are not yet dead")
}
if n := len(models) - 1; n > 0 {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AllModels() call above will return even those models that are already Dead. Do we want them to be part of this count?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, per the comment I added: we shouldn't be able to mark the controller model as Dead until all hosted models are removed

return nil, errors.Trace(hasHostedModelsError(n))
}
}

Expand Down
20 changes: 9 additions & 11 deletions worker/undertaker/undertaker.go
Original file line number Diff line number Diff line change
Expand Up @@ -154,30 +154,28 @@ func (u *Undertaker) processDyingModel() error {
if err := u.catacomb.Add(watcher); err != nil {
return errors.Trace(err)
}
defer watcher.Kill() // The watcher is not needed once this func returns.
defer watcher.Kill()

attempt := 1
for {
select {
case <-u.catacomb.Dying():
return u.catacomb.ErrDying()
case <-watcher.Changes():
// TODO(fwereade): this is wrong. If there's a time
// it's ok to ignore an error, it's when we know
// exactly what an error is/means. If there's a
// specific code for "not done yet", *that* is what
// we should be ignoring. But unknown errors are
// *unknown*, and we can't just assume that it's
// safe to continue.
err := u.config.Facade.ProcessDyingModel()
if err == nil {
// ProcessDyingModel succeeded. We're free to
// destroy any remaining environ resources.
return nil
}
// Yes, we ignore the error. See comment above. But let's at least
// surface it in status.
u.setStatus(status.Destroying, fmt.Sprintf("attempt %d to destroy model failed (will retry): %v", attempt, err))
if !params.IsCodeModelNotEmpty(err) && !params.IsCodeHasHostedModels(err) {
return errors.Trace(err)
}
// Retry once there are changes to the model's resources.
u.setStatus(
status.Destroying,
fmt.Sprintf("attempt %d to destroy model failed (will retry): %v", attempt, err),
)
}
attempt++
}
Expand Down
24 changes: 22 additions & 2 deletions worker/undertaker/undertaker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,9 +119,9 @@ func (s *UndertakerSuite) TestProcessDyingModelErrorRetried(c *gc.C) {
nil, // ModelInfo
nil, // SetStatus
nil, // WatchModelResources,
errors.New("meh, will retry"), // ProcessDyingModel,
&params.Error{Code: params.CodeHasHostedModels},
nil, // SetStatus
errors.New("will retry again"), // ProcessDyingModel,
&params.Error{Code: params.CodeModelNotEmpty},
nil, // SetStatus
nil, // ProcessDyingModel,
nil, // SetStatus
Expand All @@ -146,6 +146,26 @@ func (s *UndertakerSuite) TestProcessDyingModelErrorRetried(c *gc.C) {
)
}

func (s *UndertakerSuite) TestProcessDyingModelErrorFatal(c *gc.C) {
s.fix.errors = []error{
nil, // ModelInfo
nil, // SetStatus
nil, // WatchModelResources,
errors.New("nope"),
}
s.fix.dirty = true
stub := s.fix.run(c, func(w worker.Worker) {
err := workertest.CheckKilled(c, w)
c.Check(err, gc.ErrorMatches, "nope")
})
stub.CheckCallNames(c,
"ModelInfo",
"SetStatus",
"WatchModelResources",
"ProcessDyingModel",
)
}

func (s *UndertakerSuite) TestDestroyErrorFatal(c *gc.C) {
s.fix.errors = []error{nil, nil, errors.New("pow")}
s.fix.info.Result.Life = "dead"
Expand Down