New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Improve wait for activation #13448
Improve wait for activation #13448
Changes from 5 commits
723ad4b
c81fb14
72d6962
c0a4afd
f79b411
d8b3be4
5d141df
9435b40
58eab7b
e9d6615
4bf7cf9
d5326f5
a991302
a587681
fc5180b
02672d5
e3e63c6
580cd9b
12186e3
1def8b8
959e4a4
fd5cab0
cf1e5e4
90eec05
4fd773e
3ac8251
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
|
@@ -5,17 +5,16 @@ import ( | |||
"io" | ||||
"time" | ||||
|
||||
validator2 "github.com/prysmaticlabs/prysm/v4/consensus-types/validator" | ||||
"github.com/prysmaticlabs/prysm/v4/validator/client/iface" | ||||
|
||||
"github.com/pkg/errors" | ||||
fieldparams "github.com/prysmaticlabs/prysm/v4/config/fieldparams" | ||||
"github.com/prysmaticlabs/prysm/v4/config/params" | ||||
validator2 "github.com/prysmaticlabs/prysm/v4/consensus-types/validator" | ||||
"github.com/prysmaticlabs/prysm/v4/encoding/bytesutil" | ||||
"github.com/prysmaticlabs/prysm/v4/math" | ||||
"github.com/prysmaticlabs/prysm/v4/monitoring/tracing" | ||||
ethpb "github.com/prysmaticlabs/prysm/v4/proto/prysm/v1alpha1" | ||||
"github.com/prysmaticlabs/prysm/v4/time/slots" | ||||
"github.com/prysmaticlabs/prysm/v4/validator/client/iface" | ||||
"go.opencensus.io/trace" | ||||
) | ||||
|
||||
|
@@ -33,18 +32,18 @@ func (v *validator) WaitForActivation(ctx context.Context, accountsChangedChan c | |||
if err != nil { | ||||
return err | ||||
} | ||||
// subscribe to the channel if it's the first time | ||||
sub := km.SubscribeAccountChanges(accountsChangedChan) | ||||
defer func() { | ||||
sub.Unsubscribe() | ||||
close(accountsChangedChan) | ||||
}() | ||||
} | ||||
|
||||
return v.internalWaitForActivation(ctx, accountsChangedChan) | ||||
} | ||||
|
||||
// internalWaitForActivation performs the following: | ||||
// 1) While the key manager is empty, poll the key manager until some validator keys exist. | ||||
// 1) While the key manager is empty, subscribe to keymanager changes until some validator keys exist. | ||||
// 2) Open a server side stream for activation events against the given keys. | ||||
// 3) In another go routine, the key manager is monitored for updates and emits an update event on | ||||
// the accountsChangedChan. When an event signal is received, restart the internalWaitForActivation routine. | ||||
|
@@ -53,65 +52,41 @@ func (v *validator) WaitForActivation(ctx context.Context, accountsChangedChan c | |||
func (v *validator) internalWaitForActivation(ctx context.Context, accountsChangedChan <-chan [][fieldparams.BLSPubkeyLength]byte) error { | ||||
ctx, span := trace.StartSpan(ctx, "validator.WaitForActivation") | ||||
defer span.End() | ||||
|
||||
validatingKeys, err := v.keyManager.FetchValidatingPublicKeys(ctx) | ||||
if err != nil { | ||||
return errors.Wrap(err, "could not fetch validating keys") | ||||
return errors.Wrap(err, msgCouldNotFetchKeys) | ||||
} | ||||
if len(validatingKeys) == 0 { | ||||
log.Warn(msgNoKeysFetched) | ||||
|
||||
ticker := time.NewTicker(keyRefetchPeriod) | ||||
defer ticker.Stop() | ||||
for { | ||||
select { | ||||
case <-ticker.C: | ||||
validatingKeys, err = v.keyManager.FetchValidatingPublicKeys(ctx) | ||||
if err != nil { | ||||
return errors.Wrap(err, msgCouldNotFetchKeys) | ||||
} | ||||
if len(validatingKeys) == 0 { | ||||
log.Warn(msgNoKeysFetched) | ||||
continue | ||||
} | ||||
case <-ctx.Done(): | ||||
log.Debug("Context closed, exiting fetching validating keys") | ||||
return ctx.Err() | ||||
} | ||||
break | ||||
} | ||||
} | ||||
|
||||
req := ðpb.ValidatorActivationRequest{ | ||||
PublicKeys: bytesutil.FromBytes48Array(validatingKeys), | ||||
} | ||||
stream, err := v.validatorClient.WaitForActivation(ctx, req) | ||||
if err != nil { | ||||
tracing.AnnotateError(span, err) | ||||
attempts := streamAttempts(ctx) | ||||
log.WithError(err).WithField("attempts", attempts). | ||||
Error("Stream broken while waiting for activation. Reconnecting...") | ||||
// Reconnection attempt backoff, up to 60s. | ||||
time.Sleep(time.Second * time.Duration(math.Min(uint64(attempts), 60))) | ||||
return v.internalWaitForActivation(incrementRetries(ctx), accountsChangedChan) | ||||
} | ||||
|
||||
if err = v.handleAccountsChanged(ctx, accountsChangedChan, &stream, span); err != nil { | ||||
return err | ||||
} | ||||
|
||||
v.ticker = slots.NewSlotTicker(time.Unix(int64(v.genesisTime), 0), params.BeaconConfig().SecondsPerSlot) | ||||
return nil | ||||
} | ||||
|
||||
func (v *validator) handleAccountsChanged(ctx context.Context, accountsChangedChan <-chan [][fieldparams.BLSPubkeyLength]byte, stream *ethpb.BeaconNodeValidator_WaitForActivationClient, span *trace.Span) error { | ||||
// loop while there are no validator keys ... | ||||
for { | ||||
select { | ||||
case <-ctx.Done(): | ||||
log.Debug("Context closed, exiting fetching validating keys") | ||||
return ctx.Err() | ||||
case <-accountsChangedChan: | ||||
// Accounts (keys) changed, restart the process. | ||||
// if the accounts changed try it again | ||||
return v.internalWaitForActivation(ctx, accountsChangedChan) | ||||
default: | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This default case doesn't do anything, we can remove it There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this actually improved the efficiency as I believe golang efficiently handles the loop if this is the case |
||||
res, err := (*stream).Recv() | ||||
if len(validatingKeys) == 0 { | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. when the account changes the function is recursive. my understanding is that this loop only really needs to run if you have 0 active keys anyways. once you have 1 active key it exits and proceeds and subsequent key changes are handled in the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You're right, I missed the recursivity. |
||||
continue | ||||
} | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do we need maybe some small sleep here? we are here in the endless for loop and if channels won't be filled for some longer period I think we can experience some CPU spike here? |
||||
stream, err := v.validatorClient.WaitForActivation(ctx, ðpb.ValidatorActivationRequest{ | ||||
PublicKeys: bytesutil.FromBytes48Array(validatingKeys), | ||||
}) | ||||
if err != nil { | ||||
tracing.AnnotateError(span, err) | ||||
attempts := streamAttempts(ctx) | ||||
log.WithError(err).WithField("attempts", attempts). | ||||
Error("Stream broken while waiting for activation. Reconnecting...") | ||||
// Reconnection attempt backoff, up to 60s. | ||||
time.Sleep(time.Second * time.Duration(math.Min(uint64(attempts), 60))) | ||||
return v.internalWaitForActivation(incrementRetries(ctx), accountsChangedChan) | ||||
} | ||||
|
||||
// Recv polls for validator statuses | ||||
res, err := stream.Recv() | ||||
// If the stream is closed, we stop the loop. | ||||
if errors.Is(err, io.EOF) { | ||||
break | ||||
|
@@ -150,15 +125,16 @@ func (v *validator) handleAccountsChanged(ctx context.Context, accountsChangedCh | |||
valCount = int64(valCounts[0].Count) | ||||
} | ||||
|
||||
valActivated := v.checkAndLogValidatorStatus(statuses, valCount) | ||||
if valActivated { | ||||
logActiveValidatorStatus(statuses) | ||||
} else { | ||||
if !v.checkAndLogValidatorStatus(statuses, valCount) { | ||||
continue | ||||
} | ||||
} | ||||
// If a validator is active, break out of this loop | ||||
break | ||||
} | ||||
|
||||
// reset the ticker when they are all active | ||||
v.ticker = slots.NewSlotTicker(time.Unix(int64(v.genesisTime), 0), params.BeaconConfig().SecondsPerSlot) | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What's the point of this code? The ticker is already set here: prysm/validator/client/validator.go Line 282 in 2875ce6
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I removed it, i don't think it has a usecase if it's set elsewhere, i wish it was more clear where it was set however |
||||
return nil | ||||
} | ||||
|
||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe separate the two cases
ethpb.ValidatorStatus_ACTIVE
andethpb.ValidatorStatus_EXITING
to avoid thein the switch/case?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
probably makes more sense just not having the if statement actually. activated should mean that it's using the account