/
watchdog.go
83 lines (70 loc) · 2.26 KB
/
watchdog.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
package watchdog
import (
"bytes"
"runtime/pprof"
"time"
"github.com/subrahamanyam341/andes-core-go/core"
"github.com/subrahamanyam341/andes-core-go/core/check"
"github.com/subrahamanyam341/andes-core-go/data/endProcess"
)
type watchdog struct {
alarmScheduler core.TimersScheduler
chanStopNodeProcess chan endProcess.ArgEndProcess
log core.Logger
}
// NewWatchdog creates a new instance of WatchdogTimer
func NewWatchdog(
alarmScheduler core.TimersScheduler,
chanStopNodeProcess chan endProcess.ArgEndProcess,
log core.Logger,
) (core.WatchdogTimer, error) {
if check.IfNil(alarmScheduler) {
return nil, ErrNilAlarmScheduler
}
if chanStopNodeProcess == nil {
return nil, ErrNilEndProcessChan
}
if check.IfNil(log) {
return nil, core.ErrNilLogger
}
return &watchdog{
alarmScheduler: alarmScheduler,
chanStopNodeProcess: chanStopNodeProcess,
log: log,
}, nil
}
// Set sets the given alarm
func (w *watchdog) Set(callback func(alarmID string), duration time.Duration, alarmID string) {
w.alarmScheduler.Add(callback, duration, alarmID)
}
// SetDefault sets the default alarm with the specified duration.
// When the default alarm expires, the goroutines stack traces will be logged, and the node will gracefully close.
func (w *watchdog) SetDefault(duration time.Duration, watchdogID string) {
w.alarmScheduler.Add(w.defaultWatchdogExpiry, duration, watchdogID)
}
func (w *watchdog) defaultWatchdogExpiry(watchdogID string) {
buffer := new(bytes.Buffer)
err := pprof.Lookup("goroutine").WriteTo(buffer, 1)
if err != nil {
w.log.Error("could not dump goroutines", "error", err)
}
w.log.Error("watchdog alarm has expired", "alarm", watchdogID)
w.log.Warn(buffer.String())
arg := endProcess.ArgEndProcess{
Reason: "alarm " + watchdogID + " has expired",
Description: "the " + watchdogID + " is stuck",
}
w.chanStopNodeProcess <- arg
}
// Stop stops the alarm with the specified ID
func (w *watchdog) Stop(alarmID string) {
w.alarmScheduler.Cancel(alarmID)
}
// Reset resets the alarm with the given ID
func (w *watchdog) Reset(alarmID string) {
w.alarmScheduler.Reset(alarmID)
}
// IsInterfaceNil returns true if there is no value under the interface
func (w *watchdog) IsInterfaceNil() bool {
return w == nil
}