Skip to content

Commit

Permalink
Merge pull request #5746 from anastasiamac/jobs-change-pickup
Browse files Browse the repository at this point in the history
Fixes lp#1597830: worker should not restart agent.

Xenial machines with units would hang when trying to convert to state servers under HA after new revisions of systemd and dbus were introduced.

It was discovered that the conv2state worker would explicitly restart an agent. This proposal changes the behavior to throw an error instead to ensure that proper infrastructure restarts the agent cleanly.
  • Loading branch information
jujubot committed Jul 1, 2016
2 parents fdeb501 + 498ba48 commit 113572c
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 26 deletions.
12 changes: 6 additions & 6 deletions worker/conv2state/converter.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
apimachiner "github.com/juju/juju/api/machiner"
"github.com/juju/juju/api/watcher"
"github.com/juju/juju/apiserver/params"
"github.com/juju/juju/cmd/jujud/util"
"github.com/juju/juju/state/multiwatcher"
"github.com/juju/juju/worker"
)
Expand All @@ -31,9 +32,9 @@ type converter struct {
machine machine
}

// Agent is an interface that can have its password set and be told to restart.
// Agent is an interface that exposes machine agent methods required for the
// conversion worker.
type Agent interface {
Restart() error
Tag() names.Tag
}

Expand Down Expand Up @@ -76,8 +77,8 @@ func (c *converter) SetUp() (watcher.NotifyWatcher, error) {
}

// Handle implements NotifyWatchHandler's Handle method. If the change means
// that the machine is now expected to manage the environment, we change its
// password (to set its password in mongo) and restart the agent.
// that the machine is now expected to manage the environment,
// we throw a fatal error to instigate agent restart.
func (c *converter) Handle(_ <-chan struct{}) error {
results, err := c.machine.Jobs()
if err != nil {
Expand All @@ -86,8 +87,7 @@ func (c *converter) Handle(_ <-chan struct{}) error {
if !multiwatcher.AnyJobNeedsState(results.Jobs...) {
return nil
}

return errors.Trace(c.agent.Restart())
return &util.FatalError{"bounce agent to pick up new jobs"}
}

// TearDown implements NotifyWatchHandler's TearDown method.
Expand Down
17 changes: 5 additions & 12 deletions worker/conv2state/converter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ import (

"github.com/juju/errors"
"github.com/juju/names"
jc "github.com/juju/testing/checkers"
gc "gopkg.in/check.v1"

"github.com/juju/juju/apiserver/params"
Expand Down Expand Up @@ -71,8 +70,9 @@ func (s Suite) TestHandle(c *gc.C) {
_, err := conv.SetUp()
c.Assert(err, gc.IsNil)
err = conv.Handle(nil)
c.Assert(err, gc.IsNil)
c.Assert(a.didRestart, jc.IsTrue)
// Since machine has multiwatcher.JobManageEnviron, we expect an error
// which will get agent to restart.
c.Assert(err.Error(), gc.Equals, "bounce agent to pick up new jobs")
}

func (s Suite) TestHandleNoManageEnviron(c *gc.C) {
Expand All @@ -87,7 +87,6 @@ func (s Suite) TestHandleNoManageEnviron(c *gc.C) {
c.Assert(err, gc.IsNil)
err = conv.Handle(nil)
c.Assert(err, gc.IsNil)
c.Assert(a.didRestart, jc.IsFalse)
}

func (Suite) TestHandleJobsError(c *gc.C) {
Expand All @@ -103,13 +102,11 @@ func (Suite) TestHandleJobsError(c *gc.C) {
c.Assert(err, gc.IsNil)
err = conv.Handle(nil)
c.Assert(errors.Cause(err), gc.Equals, m.jobsErr)
c.Assert(a.didRestart, jc.IsFalse)
}

func (s Suite) TestHandleRestartError(c *gc.C) {
a := &fakeAgent{
tag: names.NewMachineTag("1"),
restartErr: errors.New("foo"),
tag: names.NewMachineTag("1"),
}
jobs := []multiwatcher.MachineJob{multiwatcher.JobHostUnits, multiwatcher.JobManageEnviron}
m := &fakeMachine{
Expand All @@ -120,9 +117,5 @@ func (s Suite) TestHandleRestartError(c *gc.C) {
_, err := conv.SetUp()
c.Assert(err, gc.IsNil)
err = conv.Handle(nil)
c.Assert(errors.Cause(err), gc.Equals, a.restartErr)

// We set this to true whenver the function is called, even though we're
// returning an error from it.
c.Assert(a.didRestart, jc.IsTrue)
c.Assert(err.Error(), gc.Equals, "bounce agent to pick up new jobs")
}
9 changes: 1 addition & 8 deletions worker/conv2state/fakes_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,14 +54,7 @@ func (fakeWatcher) Err() error {
}

type fakeAgent struct {
tag names.Tag
restartErr error
didRestart bool
}

func (f *fakeAgent) Restart() error {
f.didRestart = true
return f.restartErr
tag names.Tag
}

func (f fakeAgent) Tag() names.Tag {
Expand Down

0 comments on commit 113572c

Please sign in to comment.