-
Notifications
You must be signed in to change notification settings - Fork 93
/
metrics.go
153 lines (129 loc) · 3.53 KB
/
metrics.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
package lotus
import (
"context"
"fmt"
"sync"
"time"
"github.com/filecoin-project/lotus/api"
"go.opentelemetry.io/otel/metric"
"go.opentelemetry.io/otel/metric/global"
)
const (
metricHeightInterval = time.Second * 120
)
// SyncMonitor provides information about the Lotus
// syncing status.
type SyncMonitor struct {
cb ClientBuilder
lock sync.Mutex
height int64
heightDiff int64
remaining int64
}
// NewSyncMonitor creates a new LotusSyncMonitor.
func NewSyncMonitor(cb ClientBuilder) (*SyncMonitor, error) {
lsm := &SyncMonitor{cb: cb}
lsm.initMetrics()
if err := lsm.refreshSyncDiff(); err != nil {
return nil, fmt.Errorf("getting initial sync height diff: %s", err)
}
go func() {
for {
lsm.evaluate()
time.Sleep(metricHeightInterval)
}
}()
return lsm, nil
}
// SyncHeightDiff returns the height difference between the tip of the chain
// and the current synced height.
func (lsm *SyncMonitor) SyncHeightDiff() int64 {
lsm.lock.Lock()
defer lsm.lock.Unlock()
return lsm.heightDiff
}
func (lsm *SyncMonitor) evaluate() {
if err := lsm.refreshHeightMetric(); err != nil {
log.Errorf("refreshing height metric: %s", err)
}
if err := lsm.checkSyncStatus(); err != nil {
log.Errorf("checking sync status: %s", err)
}
}
func (lsm *SyncMonitor) refreshHeightMetric() error {
c, cls, err := lsm.cb(context.Background())
if err != nil {
return fmt.Errorf("creating lotus client for monitoring: %s", err)
}
defer cls()
heaviest, err := c.ChainHead(context.Background())
if err != nil {
return fmt.Errorf("getting chain head: %s", err)
}
lsm.lock.Lock()
lsm.height = int64(heaviest.Height())
lsm.lock.Unlock()
return nil
}
func (lsm *SyncMonitor) checkSyncStatus() error {
if err := lsm.refreshSyncDiff(); err != nil {
return fmt.Errorf("refreshing height difference: %s", err)
}
lsm.lock.Lock()
defer lsm.lock.Unlock()
if lsm.heightDiff > 10 {
log.Warnf("Louts behind in syncing with height diff %d, todo: %d", lsm.heightDiff, lsm.remaining)
}
return nil
}
func (lsm *SyncMonitor) refreshSyncDiff() error {
c, cls, err := lsm.cb(context.Background())
if err != nil {
return fmt.Errorf("creating lotus client: %s", err)
}
defer cls()
ctx, cls := context.WithTimeout(context.Background(), time.Second*5)
defer cls()
ss, err := c.SyncState(ctx)
if err != nil {
return fmt.Errorf("calling sync state: %s", err)
}
var maxHeightDiff, remaining int64
for _, as := range ss.ActiveSyncs {
if as.Stage != api.StageSyncComplete {
var heightDiff int64
if as.Base != nil {
heightDiff = int64(as.Base.Height())
}
if as.Target != nil {
heightDiff = int64(as.Target.Height()) - heightDiff
} else {
heightDiff = 0
}
if heightDiff > maxHeightDiff {
maxHeightDiff = heightDiff
remaining = int64(as.Target.Height() - as.Height)
}
}
}
lsm.lock.Lock()
lsm.heightDiff = maxHeightDiff
lsm.remaining = remaining
lsm.lock.Unlock()
return nil
}
func (lsm *SyncMonitor) initMetrics() {
meter := global.Meter("powergate")
_ = metric.Must(meter).NewInt64ValueObserver("powergate.lotus.height",
func(ctx context.Context, result metric.Int64ObserverResult) {
lsm.lock.Lock()
defer lsm.lock.Unlock()
result.Observe(lsm.height)
}, metric.WithDescription("Lotus node height"))
_ = metric.Must(meter).NewInt64ValueObserver("powergate.lotus.height.diff",
func(ctx context.Context, result metric.Int64ObserverResult) {
lsm.lock.Lock()
defer lsm.lock.Unlock()
result.Observe(lsm.heightDiff)
}, metric.WithDescription("Lotus node height syncing diff"))
}