-
Notifications
You must be signed in to change notification settings - Fork 61
/
watcher.go
160 lines (136 loc) · 3.69 KB
/
watcher.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
//
// Copyright 2020 New Relic Corporation. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
package main
import (
"errors"
"fmt"
"os"
"os/exec"
"os/signal"
"runtime"
"syscall"
"newrelic/log"
)
type workerState struct {
status syscall.WaitStatus
err error
}
// runWatcher spawns and supervises worker processes. When a worker exits
// unexpectedly, it is respawned. Only a single worker process should
// exist at any given time.
func runWatcher(cfg *Config) {
signalChan := make(chan os.Signal, 1)
signal.Notify(signalChan, syscall.SIGTERM)
// until we're told to shutdown or the worker exits cleanly
// spawn a worker
// wait for worker to exit or a signal to arrive
// worker exit status 0, exit with success
// worker exit status 1, exit with ???
// worker exit status >= 2, respawn worker
// SIGTERM: stop worker and exit with success
for {
worker, err := spawnWorker()
if err != nil {
// Some older RHEL 5.x linux/CentOS 5.x Xen kernels incorrectly handle
// missing system calls (here: pipe2), which manifests as an EBADF
// error when spawning a child process.
if runtime.GOOS == "linux" {
perr, ok := err.(*os.PathError)
if ok && perr.Err == syscall.EBADF {
err = borkedSyscallError("pipe2")
}
}
log.Errorf("unable to create worker: %v", err)
setExitStatus(1)
return
}
select {
case status := <-supervise(worker):
if status != nil && status.ShouldRespawn() {
log.Errorf("%v - restarting", status)
} else {
log.Infof("%v - NOT restarting", status)
return
}
case caught := <-signalChan:
log.Infof("watcher received signal %d - exiting", caught)
worker.Process.Signal(caught)
return
}
}
}
// spawnWorker starts a new worker process.
func spawnWorker() (*exec.Cmd, error) {
env := Environment(os.Environ())
env.Set(RoleEnvironmentVariable, "worker")
cmd := exec.Command(os.Args[0], os.Args[1:]...)
cmd.Env = []string(env)
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
// Only one process can own the pid file, and if this function
// is being called that should be the current process.
cmd.Args = append(cmd.Args, "-no-pidfile")
return cmd, cmd.Start()
}
// supervise monitors a single worker process.
func supervise(worker *exec.Cmd) chan *workerState {
if worker == nil {
panic("worker is nil")
}
statusChan := make(chan *workerState)
go func() {
if err := worker.Wait(); err != nil {
if _, ok := err.(*exec.ExitError); ok {
statusChan <- newWorkerState(worker.ProcessState)
} else {
statusChan <- &workerState{err: err}
}
} else {
statusChan <- newWorkerState(worker.ProcessState)
}
close(statusChan)
}()
return statusChan
}
// ShouldRespawn determines whether a worker should be respawned based on
// how it terminated.
func (s *workerState) ShouldRespawn() bool {
if s.err != nil {
return true
}
switch st := s.status; {
case st.Exited():
return st.ExitStatus() >= 2
case st.Signaled():
return st.Signal() != syscall.SIGTERM
default:
return true
}
}
func (s *workerState) String() string {
if s.err != nil {
return s.err.Error()
}
switch st := s.status; {
case st.Exited():
return fmt.Sprintf("worker exited with status %d", st.ExitStatus())
case st.Signaled():
if st.CoreDump() {
return fmt.Sprintf("worker received signal %s (core dumped)", st.Signal())
}
return fmt.Sprintf("worker received signal %s", st.Signal())
default:
return "worker exited with unknown status"
}
}
func newWorkerState(ps *os.ProcessState) *workerState {
if ws, ok := ps.Sys().(syscall.WaitStatus); ok {
return &workerState{status: ws}
}
if ps.Success() {
return &workerState{}
}
return &workerState{err: errors.New(ps.String())}
}