Skip to content

Commit

Permalink
Merge 288de1c into 3aa7cba
Browse files Browse the repository at this point in the history
  • Loading branch information
Songmu committed Feb 12, 2017
2 parents 3aa7cba + 288de1c commit 3d3cdd1
Show file tree
Hide file tree
Showing 8 changed files with 368 additions and 1 deletion.
2 changes: 1 addition & 1 deletion .goxc.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@
"TasksExclude": [
"go-test"
],
"MainDirsExclude": "wix,checks/testdata",
"MainDirsExclude": "wix,testdata,checks/testdata",
"ResourcesInclude": "README*,mackerel-agent.conf"
}
28 changes: 28 additions & 0 deletions commands.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,34 @@ func doInit(fs *flag.FlagSet, argv []string) error {
return err
}

/* +command supervise - supervise mode
supervise -conf mackerel-agent.conf ...
run as supervise mode enabling configuration reloading and crash recovery
*/
func doSupervise(fs *flag.FlagSet, argv []string) error {
if runtime.GOOS == "windows" {
return fmt.Errorf("supervise mode is not supported on windows")
}
copiedArgv := make([]string, len(argv))
copy(copiedArgv, argv)
conf, err := resolveConfig(fs, argv)
if err != nil {
return err
}
err = createPidFile(conf.Pidfile)
if err != nil {
return err
}
defer removePidFile(conf.Pidfile)

return (&supervisor{
prog: os.Args[0],
argv: copiedArgv,
}).supervise()
}

/* +command version - display version of mackerel-agent
version
Expand Down
1 change: 1 addition & 0 deletions config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ type Config struct {
Roles []string
Verbose bool
Silent bool
Child bool
Diagnostic bool `toml:"diagnostic"`
Connection ConnectionConfig
DisplayName string `toml:"display_name"`
Expand Down
12 changes: 12 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ func resolveConfig(fs *flag.FlagSet, argv []string) (*config.Config, error) {
root = fs.String("root", config.DefaultConfig.Root, "Directory containing variable state information")
apikey = fs.String("apikey", "", "(DEPRECATED) API key from mackerel.io web site")
diagnostic = fs.Bool("diagnostic", false, "Enables diagnostic features")
child = fs.Bool("child", false, "(internal use) child process of the supervise mode")
verbose bool
roleFullnames roleFullnamesFlag
)
Expand Down Expand Up @@ -127,8 +128,13 @@ func resolveConfig(fs *flag.FlagSet, argv []string) (*config.Config, error) {
conf.Verbose = verbose
case "role":
conf.Roles = roleFullnames
case "child":
conf.Child = *child
}
})
if conf.Child {
conf.Pidfile = ""
}

r := []string{}
for _, roleFullName := range conf.Roles {
Expand All @@ -155,6 +161,9 @@ func resolveConfig(fs *flag.FlagSet, argv []string) (*config.Config, error) {
}

func createPidFile(pidfile string) error {
if pidfile == "" {
return nil
}
if pidString, err := ioutil.ReadFile(pidfile); err == nil {
if pid, err := strconv.Atoi(string(pidString)); err == nil {
if existsPid(pid) {
Expand Down Expand Up @@ -182,6 +191,9 @@ func createPidFile(pidfile string) error {
}

func removePidFile(pidfile string) {
if pidfile == "" {
return
}
if err := os.Remove(pidfile); err != nil {
logger.Errorf("Failed to remove the pidfile: %s: %s", pidfile, err)
}
Expand Down
100 changes: 100 additions & 0 deletions supervisor.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
package main

import (
"bytes"
"fmt"
"os"
"os/exec"
"os/signal"
"syscall"
"time"
)

type supervisor struct {
prog string
argv []string

cmd *exec.Cmd
startAt time.Time
signaled bool
hupped bool
}

var spawnInterval = 30 * time.Second

func (sv *supervisor) launched() bool {
return sv.cmd.Process != nil && time.Now().After(sv.startAt.Add(spawnInterval))
}

func (sv *supervisor) buildCmd() *exec.Cmd {
argv := append(sv.argv, "-child")
cmd := exec.Command(sv.prog, argv...)
cmd.Stderr = os.Stderr
cmd.Stdout = os.Stdout
return cmd
}

func (sv *supervisor) start() error {
sv.hupped = false
sv.cmd = sv.buildCmd()
sv.startAt = time.Now()
return sv.cmd.Start()
}

func (sv *supervisor) stop(sig os.Signal) error {
sv.signaled = true
return sv.cmd.Process.Signal(sig)
}

func (sv *supervisor) configtest() error {
argv := append([]string{"configtest"}, sv.argv...)
cmd := exec.Command(sv.prog, argv...)
buf := &bytes.Buffer{}
cmd.Stderr = buf
err := cmd.Run()
if err != nil {
return fmt.Errorf("configtest failed: %s", buf.String())
}
return nil
}

func (sv *supervisor) reload() error {
err := sv.configtest()
if err != nil {
return err
}
sv.hupped = true
return sv.cmd.Process.Signal(syscall.SIGTERM)
}

func (sv *supervisor) wait() (err error) {
for {
err = sv.cmd.Wait()
if sv.signaled || (!sv.hupped && !sv.launched()) {
break
}
sv.start()
}
return
}

func (sv *supervisor) handleSignal(ch <-chan os.Signal) {
for sig := range ch {
if sig == syscall.SIGHUP {
err := sv.reload()
if err != nil {
logger.Warningf("failed to reload: %s", err.Error())
}
} else {
sv.stop(sig)
}
}
}

func (sv *supervisor) supervise() error {
sv.start()
c := make(chan os.Signal)
signal.Notify(c, os.Interrupt, syscall.SIGTERM, syscall.SIGQUIT, syscall.SIGHUP)
go sv.handleSignal(c)
return sv.wait()
}
180 changes: 180 additions & 0 deletions supervisor_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
// +build !windows

package main

import (
"os"
"os/exec"
"syscall"
"testing"
"time"
)

const stubAgent = "testdata/stub-agent"

func init() {
err := exec.Command("go", "build", "-o", stubAgent, "testdata/stub-agent.go").Run()
if err != nil {
panic(err)
}
}

func TestSupervisor(t *testing.T) {
sv := &supervisor{
prog: stubAgent,
argv: []string{"dummy"},
}
sv.start()
ch := make(chan os.Signal)
go sv.handleSignal(ch)
done := make(chan error)
go func() {
done <- sv.wait()
}()
pid := sv.cmd.Process.Pid
if !existsPid(pid) {
t.Errorf("process doesn't exist")
}
ch <- os.Interrupt

err := <-done
if err != nil {
t.Errorf("error should be nil but: %s", err)
}
if existsPid(pid) {
t.Errorf("child process isn't terminated")
}
}

func TestSupervisor_reload(t *testing.T) {
sv := &supervisor{
prog: stubAgent,
argv: []string{"dummy"},
}
sv.start()
ch := make(chan os.Signal)
go sv.handleSignal(ch)
done := make(chan error)
go func() {
done <- sv.wait()
}()
oldPid := sv.cmd.Process.Pid
if !existsPid(oldPid) {
t.Errorf("process doesn't exist")
}
ch <- syscall.SIGHUP
time.Sleep(200 * time.Millisecond)
newPid := sv.cmd.Process.Pid
if oldPid == newPid {
t.Errorf("reload failed")
}
if existsPid(oldPid) {
t.Errorf("old process isn't terminated")
}
if !existsPid(newPid) {
t.Errorf("new process doesn't exist")
}
ch <- syscall.SIGTERM
err := <-done
if err != nil {
t.Errorf("something went wrong")
}
if newPid != sv.cmd.Process.Pid {
t.Errorf("something went wrong")
}
if existsPid(newPid) {
t.Errorf("child process isn't terminated")
}
}

func TestSupervisor_reloadFail(t *testing.T) {
sv := &supervisor{
prog: stubAgent,
argv: []string{"failed"},
}
sv.start()
ch := make(chan os.Signal)
go sv.handleSignal(ch)
done := make(chan error)
go func() {
done <- sv.wait()
}()
oldPid := sv.cmd.Process.Pid
if !existsPid(oldPid) {
t.Errorf("process doesn't exist")
}
ch <- syscall.SIGHUP
time.Sleep(time.Second)
newPid := sv.cmd.Process.Pid
if oldPid != newPid {
t.Errorf("reload should be failed, but unintentionally reloaded")
}

ch <- syscall.SIGTERM
<-done
}

func TestSupervisor_launchFailed(t *testing.T) {
sv := &supervisor{
prog: stubAgent,
argv: []string{"launch failure"},
}
sv.start()
ch := make(chan os.Signal)
go sv.handleSignal(ch)
done := make(chan error)
go func() {
done <- sv.wait()
}()
pid := sv.cmd.Process.Pid
if !existsPid(pid) {
t.Errorf("process doesn't exist")
}
err := <-done
if err == nil {
t.Errorf("something went wrong")
}
if existsPid(sv.cmd.Process.Pid) {
t.Errorf("child process isn't terminated")
}
}

func TestSupervisor_crashRecovery(t *testing.T) {
origSpawnInterval := spawnInterval
spawnInterval = 300 * time.Millisecond
defer func() { spawnInterval = origSpawnInterval }()

sv := &supervisor{
prog: stubAgent,
argv: []string{"blah blah blah"},
}
sv.start()
ch := make(chan os.Signal)
go sv.handleSignal(ch)
done := make(chan error)
go func() {
done <- sv.wait()
}()
oldPid := sv.cmd.Process.Pid
if !existsPid(oldPid) {
t.Errorf("process doesn't exist")
}
time.Sleep(spawnInterval)

// let it crash
sv.cmd.Process.Signal(syscall.SIGUSR1)

time.Sleep(spawnInterval)
newPid := sv.cmd.Process.Pid
if oldPid == newPid {
t.Errorf("crash recovery failed")
}
if existsPid(oldPid) {
t.Errorf("old process isn't terminated")
}
if !existsPid(newPid) {
t.Errorf("new process doesn't exist")
}
ch <- syscall.SIGTERM
<-done
}
1 change: 1 addition & 0 deletions testdata/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
stub-agent
Loading

0 comments on commit 3d3cdd1

Please sign in to comment.