-
Notifications
You must be signed in to change notification settings - Fork 946
/
runner.go
315 lines (282 loc) · 10 KB
/
runner.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
package client
import (
"context"
"fmt"
"sync"
"time"
"github.com/pkg/errors"
"github.com/prysmaticlabs/prysm/v5/api/client"
"github.com/prysmaticlabs/prysm/v5/api/client/event"
fieldparams "github.com/prysmaticlabs/prysm/v5/config/fieldparams"
"github.com/prysmaticlabs/prysm/v5/config/params"
"github.com/prysmaticlabs/prysm/v5/consensus-types/primitives"
"github.com/prysmaticlabs/prysm/v5/encoding/bytesutil"
"github.com/prysmaticlabs/prysm/v5/time/slots"
"github.com/prysmaticlabs/prysm/v5/validator/client/iface"
"go.opencensus.io/trace"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
)
// Time to wait before trying to reconnect with beacon node.
var backOffPeriod = 10 * time.Second
// Run the main validator routine. This routine exits if the context is
// canceled.
//
// Order of operations:
// 1 - Initialize validator data
// 2 - Wait for validator activation
// 3 - Wait for the next slot start
// 4 - Update assignments
// 5 - Determine role at current slot
// 6 - Perform assigned role, if any
func run(ctx context.Context, v iface.Validator) {
cleanup := v.Done
defer cleanup()
headSlot, err := initializeValidatorAndGetHeadSlot(ctx, v)
if err != nil {
return // Exit if context is canceled.
}
if err := v.UpdateDuties(ctx, headSlot); err != nil {
handleAssignmentError(err, headSlot)
}
eventsChan := make(chan *event.Event, 1)
healthTracker := v.HealthTracker()
runHealthCheckRoutine(ctx, v, eventsChan)
accountsChangedChan := make(chan [][fieldparams.BLSPubkeyLength]byte, 1)
km, err := v.Keymanager()
if err != nil {
log.WithError(err).Fatal("Could not get keymanager")
}
sub := km.SubscribeAccountChanges(accountsChangedChan)
// check if proposer settings is still nil
// Set properties on the beacon node like the fee recipient for validators that are being used & active.
if v.ProposerSettings() == nil {
log.Warn("Validator client started without proposer settings such as fee recipient" +
" and will continue to use settings provided in the beacon node.")
}
deadline := time.Now().Add(5 * time.Minute)
if err := v.PushProposerSettings(ctx, km, headSlot, deadline); err != nil {
if errors.Is(err, ErrBuilderValidatorRegistration) {
log.WithError(err).Warn("Push proposer settings error")
} else {
log.WithError(err).Fatal("Failed to update proposer settings") // allow fatal. skipcq
}
}
for {
ctx, span := trace.StartSpan(ctx, "validator.processSlot")
select {
case <-ctx.Done():
log.Info("Context canceled, stopping validator")
span.End()
sub.Unsubscribe()
close(accountsChangedChan)
return // Exit if context is canceled.
case slot := <-v.NextSlot():
if !healthTracker.IsHealthy() {
continue
}
span.AddAttributes(trace.Int64Attribute("slot", int64(slot))) // lint:ignore uintcast -- This conversion is OK for tracing.
deadline := v.SlotDeadline(slot)
slotCtx, cancel := context.WithDeadline(ctx, deadline)
log := log.WithField("slot", slot)
log.WithField("deadline", deadline).Debug("Set deadline for proposals and attestations")
// Keep trying to update assignments if they are nil or if we are past an
// epoch transition in the beacon node's state.
if err := v.UpdateDuties(ctx, slot); err != nil {
handleAssignmentError(err, slot)
cancel()
span.End()
continue
}
// call push proposer setting at the start of each epoch to account for the following edge case:
// proposer is activated at the start of epoch and tries to propose immediately
if slots.IsEpochStart(slot) {
go func() {
// deadline set for 1 epoch from call to not overlap.
epochDeadline := v.SlotDeadline(slot + params.BeaconConfig().SlotsPerEpoch - 1)
if err := v.PushProposerSettings(ctx, km, slot, epochDeadline); err != nil {
log.WithError(err).Warn("Failed to update proposer settings")
}
}()
}
// Start fetching domain data for the next epoch.
if slots.IsEpochEnd(slot) {
go v.UpdateDomainDataCaches(ctx, slot+1)
}
var wg sync.WaitGroup
allRoles, err := v.RolesAt(ctx, slot)
if err != nil {
log.WithError(err).Error("Could not get validator roles")
cancel()
span.End()
continue
}
performRoles(slotCtx, allRoles, v, slot, &wg, span)
case isHealthyAgain := <-healthTracker.HealthUpdates():
if isHealthyAgain {
headSlot, err = initializeValidatorAndGetHeadSlot(ctx, v)
if err != nil {
log.WithError(err).Error("Failed to re initialize validator and get head slot")
continue
}
if err := v.UpdateDuties(ctx, headSlot); err != nil {
handleAssignmentError(err, headSlot)
continue
}
}
case e := <-eventsChan:
v.ProcessEvent(e)
case currentKeys := <-accountsChangedChan: // should be less of a priority than next slot
onAccountsChanged(ctx, v, currentKeys, accountsChangedChan)
}
}
}
func onAccountsChanged(ctx context.Context, v iface.Validator, current [][48]byte, ac chan [][fieldparams.BLSPubkeyLength]byte) {
anyActive, err := v.HandleKeyReload(ctx, current)
if err != nil {
log.WithError(err).Error("Could not properly handle reloaded keys")
}
if !anyActive {
log.Warn("No active keys found. Waiting for activation...")
err := v.WaitForActivation(ctx, ac)
if err != nil {
log.WithError(err).Warn("Could not wait for validator activation")
}
}
}
func initializeValidatorAndGetHeadSlot(ctx context.Context, v iface.Validator) (primitives.Slot, error) {
ticker := time.NewTicker(backOffPeriod)
defer ticker.Stop()
firstTime := true
var (
headSlot primitives.Slot
err error
)
for {
if !firstTime {
if ctx.Err() != nil {
log.Info("Context canceled, stopping validator")
return headSlot, errors.New("context canceled")
}
<-ticker.C
}
firstTime = false
if err := v.WaitForChainStart(ctx); err != nil {
if isConnectionError(err) {
log.WithError(err).Warn("Could not determine if beacon chain started")
continue
}
log.WithError(err).Fatal("Could not determine if beacon chain started")
}
if err := v.WaitForKeymanagerInitialization(ctx); err != nil {
// log.Fatal will prevent defer from being called
v.Done()
log.WithError(err).Fatal("Wallet is not ready")
}
if err := v.WaitForSync(ctx); err != nil {
if isConnectionError(err) {
log.WithError(err).Warn("Could not determine if beacon chain started")
continue
}
log.WithError(err).Fatal("Could not determine if beacon node synced")
}
if err := v.WaitForActivation(ctx, nil /* accountsChangedChan */); err != nil {
log.WithError(err).Fatal("Could not wait for validator activation")
}
headSlot, err = v.CanonicalHeadSlot(ctx)
if isConnectionError(err) {
log.WithError(err).Warn("Could not get current canonical head slot")
continue
}
if err != nil {
log.WithError(err).Fatal("Could not get current canonical head slot")
}
if err := v.CheckDoppelGanger(ctx); err != nil {
if isConnectionError(err) {
log.WithError(err).Warn("Could not wait for checking doppelganger")
continue
}
log.WithError(err).Fatal("Could not succeed with doppelganger check")
}
break
}
return headSlot, nil
}
func performRoles(slotCtx context.Context, allRoles map[[48]byte][]iface.ValidatorRole, v iface.Validator, slot primitives.Slot, wg *sync.WaitGroup, span *trace.Span) {
for pubKey, roles := range allRoles {
wg.Add(len(roles))
for _, role := range roles {
go func(role iface.ValidatorRole, pubKey [fieldparams.BLSPubkeyLength]byte) {
defer wg.Done()
switch role {
case iface.RoleAttester:
v.SubmitAttestation(slotCtx, slot, pubKey)
case iface.RoleProposer:
v.ProposeBlock(slotCtx, slot, pubKey)
case iface.RoleAggregator:
v.SubmitAggregateAndProof(slotCtx, slot, pubKey)
case iface.RoleSyncCommittee:
v.SubmitSyncCommitteeMessage(slotCtx, slot, pubKey)
case iface.RoleSyncCommitteeAggregator:
v.SubmitSignedContributionAndProof(slotCtx, slot, pubKey)
case iface.RoleUnknown:
log.WithField("pubkey", fmt.Sprintf("%#x", bytesutil.Trunc(pubKey[:]))).Trace("No active roles, doing nothing")
default:
log.Warnf("Unhandled role %v", role)
}
}(role, pubKey)
}
}
// Wait for all processes to complete, then report span complete.
go func() {
wg.Wait()
defer span.End()
defer func() {
if err := recover(); err != nil { // catch any panic in logging
log.WithField("error", err).
Error("Panic occurred when logging validator report. This" +
" should never happen! Please file a report at github.com/prysmaticlabs/prysm/issues/new")
}
}()
// Log performance in the previous slot
v.LogSubmittedAtts(slot)
v.LogSubmittedSyncCommitteeMessages()
if err := v.LogValidatorGainsAndLosses(slotCtx, slot); err != nil {
log.WithError(err).Error("Could not report validator's rewards/penalties")
}
}()
}
func isConnectionError(err error) bool {
return err != nil && errors.Is(err, client.ErrConnectionIssue)
}
func handleAssignmentError(err error, slot primitives.Slot) {
if errors.Is(err, ErrValidatorsAllExited) {
log.Warn(ErrValidatorsAllExited)
} else if errCode, ok := status.FromError(err); ok && errCode.Code() == codes.NotFound {
log.WithField(
"epoch", slot/params.BeaconConfig().SlotsPerEpoch,
).Warn("Validator not yet assigned to epoch")
} else {
log.WithError(err).Error("Failed to update assignments")
}
}
func runHealthCheckRoutine(ctx context.Context, v iface.Validator, eventsChan chan<- *event.Event) {
log.Info("Starting health check routine for beacon node apis")
healthCheckTicker := time.NewTicker(time.Duration(params.BeaconConfig().SecondsPerSlot) * time.Second)
tracker := v.HealthTracker()
go func() {
// trigger the healthcheck immediately the first time
for ; true; <-healthCheckTicker.C {
if ctx.Err() != nil {
log.WithError(ctx.Err()).Error("Context cancelled")
return
}
isHealthy := tracker.CheckHealth(ctx)
// in case of node returning healthy but event stream died
if isHealthy && !v.EventStreamIsRunning() {
log.Info("Event stream reconnecting...")
go v.StartEventStream(ctx, event.DefaultEventTopics, eventsChan)
}
}
}()
}