/
execer.go
279 lines (237 loc) · 7.18 KB
/
execer.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
package cmd
import (
"context"
"fmt"
"io"
"os/exec"
"sync"
"syscall"
"testing"
"time"
"github.com/stretchr/testify/require"
"github.com/tilt-dev/tilt/internal/localexec"
"github.com/tilt-dev/tilt/pkg/logger"
"github.com/tilt-dev/tilt/pkg/model"
"github.com/tilt-dev/tilt/pkg/procutil"
)
var DefaultGracePeriod = 30 * time.Second
type Execer interface {
// Returns a channel to pull status updates from. After the process exists
// (and transmits its final status), the channel is closed.
Start(ctx context.Context, cmd model.Cmd, w io.Writer) chan statusAndMetadata
}
type fakeExecProcess struct {
closeCh chan bool
exitCh chan int
workdir string
env []string
startTime time.Time
}
type FakeExecer struct {
// really dumb/simple process management - key by the command string, and make duplicates an error
processes map[string]*fakeExecProcess
mu sync.Mutex
}
func NewFakeExecer() *FakeExecer {
return &FakeExecer{
processes: make(map[string]*fakeExecProcess),
}
}
func (e *FakeExecer) Start(ctx context.Context, cmd model.Cmd, w io.Writer) chan statusAndMetadata {
e.mu.Lock()
oldProcess, ok := e.processes[cmd.String()]
e.mu.Unlock()
if ok {
select {
case <-oldProcess.closeCh:
case <-time.After(5 * time.Second):
logger.Get(ctx).Infof("internal error: fake execer only supports one instance of each unique command at a time. tried to start a second instance of %q", cmd.Argv)
return nil
}
}
exitCh := make(chan int)
closeCh := make(chan bool)
e.mu.Lock()
e.processes[cmd.String()] = &fakeExecProcess{
closeCh: closeCh,
exitCh: exitCh,
workdir: cmd.Dir,
startTime: time.Now(),
env: cmd.Env,
}
e.mu.Unlock()
statusCh := make(chan statusAndMetadata)
go func() {
fakeRun(ctx, cmd, w, statusCh, exitCh)
e.mu.Lock()
close(closeCh)
delete(e.processes, cmd.String())
e.mu.Unlock()
}()
return statusCh
}
// stops the command with the given command, faking the specified exit code
func (e *FakeExecer) stop(cmd string, exitCode int) error {
e.mu.Lock()
p, ok := e.processes[cmd]
e.mu.Unlock()
if !ok {
return fmt.Errorf("no such process %q", cmd)
}
p.exitCh <- exitCode
e.mu.Lock()
delete(e.processes, cmd)
e.mu.Unlock()
return nil
}
func fakeRun(ctx context.Context, cmd model.Cmd, w io.Writer, statusCh chan statusAndMetadata, exitCh chan int) {
defer close(statusCh)
_, _ = fmt.Fprintf(w, "Starting cmd %v\n", cmd)
statusCh <- statusAndMetadata{status: Running}
select {
case <-ctx.Done():
_, _ = fmt.Fprintf(w, "cmd %v canceled\n", cmd)
// this was cleaned up by the controller, so it's not an error
statusCh <- statusAndMetadata{status: Done, exitCode: 0}
case exitCode := <-exitCh:
_, _ = fmt.Fprintf(w, "cmd %v exited with code %d\n", cmd, exitCode)
// even an exit code of 0 is an error, because services aren't supposed to exit!
statusCh <- statusAndMetadata{status: Error, exitCode: exitCode}
}
}
func (fe *FakeExecer) RequireNoKnownProcess(t *testing.T, cmd string) {
t.Helper()
fe.mu.Lock()
defer fe.mu.Unlock()
_, ok := fe.processes[cmd]
require.False(t, ok, "%T should not be tracking any process with cmd %q, but it is", FakeExecer{}, cmd)
}
func ProvideExecer(localEnv *localexec.Env) Execer {
return NewProcessExecer(localEnv)
}
type processExecer struct {
gracePeriod time.Duration
localEnv *localexec.Env
}
func NewProcessExecer(localEnv *localexec.Env) *processExecer {
return &processExecer{
gracePeriod: DefaultGracePeriod,
localEnv: localEnv,
}
}
func (e *processExecer) Start(ctx context.Context, cmd model.Cmd, w io.Writer) chan statusAndMetadata {
statusCh := make(chan statusAndMetadata)
go func() {
e.processRun(ctx, cmd, w, statusCh)
}()
return statusCh
}
func (e *processExecer) processRun(ctx context.Context, cmd model.Cmd, w io.Writer, statusCh chan statusAndMetadata) {
defer close(statusCh)
logger.Get(ctx).Infof("Running cmd: %s", cmd.String())
c, err := e.localEnv.ExecCmd(cmd, logger.Get(ctx))
if err != nil {
logger.Get(ctx).Errorf("%q invalid cmd: %v", cmd.String(), err)
statusCh <- statusAndMetadata{
status: Error,
exitCode: 1,
reason: fmt.Sprintf("invalid cmd: %v", err),
}
return
}
c.SysProcAttr = &syscall.SysProcAttr{}
procutil.SetOptNewProcessGroup(c.SysProcAttr)
c.Stderr = w
c.Stdout = w
err = c.Start()
if err != nil {
logger.Get(ctx).Errorf("%s failed to start: %v", cmd.String(), err)
statusCh <- statusAndMetadata{
status: Error,
exitCode: 1,
reason: fmt.Sprintf("failed to start: %v", err),
}
return
}
pid := c.Process.Pid
statusCh <- statusAndMetadata{status: Running, pid: pid}
// This is to prevent this goroutine from blocking, since we know there's only going to be one result
processExitCh := make(chan error, 1)
go func() {
// Cmd Wait() does not have quite the semantics we want,
// because it will block indefinitely on any descendant processes.
// This can lead to Cmd appearing to hang.
//
// Instead, we exit immediately if the main process exits.
//
// Details:
// https://github.com/tilt-dev/tilt/issues/4456
state, err := c.Process.Wait()
procutil.KillProcessGroup(c)
if err != nil {
processExitCh <- err
} else if !state.Success() {
processExitCh <- &exec.ExitError{ProcessState: state}
} else {
processExitCh <- nil
}
close(processExitCh)
}()
select {
case err := <-processExitCh:
exitCode := 0
reason := ""
status := Done
if err == nil {
// Use defaults
} else if ee, ok := err.(*exec.ExitError); ok {
status = Error
exitCode = ee.ExitCode()
reason = err.Error()
logger.Get(ctx).Debugf("%s exited with exit code %d", cmd.String(), ee.ExitCode())
} else {
status = Error
exitCode = 1
reason = err.Error()
logger.Get(ctx).Errorf("error execing %s: %v", cmd.String(), err)
}
statusCh <- statusAndMetadata{status: status, pid: pid, exitCode: exitCode, reason: reason}
case <-ctx.Done():
e.killProcess(ctx, c, processExitCh)
statusCh <- statusAndMetadata{status: Done, pid: pid, reason: "killed", exitCode: 137}
}
}
func (e *processExecer) killProcess(ctx context.Context, c *exec.Cmd, processExitCh chan error) {
logger.Get(ctx).Debugf("About to gracefully shut down process %d", c.Process.Pid)
err := procutil.GracefullyShutdownProcess(c.Process)
if err != nil {
logger.Get(ctx).Debugf("Unable to gracefully kill process %d, sending SIGKILL to the process group: %v", c.Process.Pid, err)
procutil.KillProcessGroup(c)
return
}
// we wait 30 seconds to give the process enough time to finish doing any cleanup.
// this is the same timeout that Kubernetes uses
// TODO(dmiller): make this configurable via the Tiltfile
infoCh := time.After(e.gracePeriod / 20)
moreInfoCh := time.After(e.gracePeriod / 3)
finalCh := time.After(e.gracePeriod)
select {
case <-infoCh:
logger.Get(ctx).Infof("Waiting %s for process to exit... (pid: %d)", e.gracePeriod, c.Process.Pid)
case <-processExitCh:
return
}
select {
case <-moreInfoCh:
logger.Get(ctx).Infof("Still waiting on exit... (pid: %d)", c.Process.Pid)
case <-processExitCh:
return
}
select {
case <-finalCh:
logger.Get(ctx).Infof("Time is up! Sending %d a kill signal", c.Process.Pid)
procutil.KillProcessGroup(c)
case <-processExitCh:
return
}
}