-
Notifications
You must be signed in to change notification settings - Fork 212
/
sync.go
303 lines (272 loc) · 7.62 KB
/
sync.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
package peersync
import (
"context"
"errors"
"fmt"
"sync"
"sync/atomic"
"time"
"github.com/libp2p/go-libp2p/core/host"
"github.com/libp2p/go-libp2p/core/network"
"golang.org/x/sync/errgroup"
"github.com/spacemeshos/go-spacemesh/codec"
"github.com/spacemeshos/go-spacemesh/log"
"github.com/spacemeshos/go-spacemesh/p2p"
)
const (
protocolName = "/peersync/1.0/"
)
var (
// ErrPeersNotSynced returned if system clock is out of sync with peers clock for configured period of time.
ErrPeersNotSynced = errors.New("timesync: peers are not time synced")
// ErrTimesyncFailed returned if we weren't able to collect enough clock samples from peers.
ErrTimesyncFailed = errors.New("timesync: failed request")
)
//go:generate mockgen -package=mocks -destination=./mocks/mocks.go -source=./sync.go
// Time provides interface for current time.
type Time interface {
Now() time.Time
}
type systemTime struct{}
func (s systemTime) Now() time.Time {
return time.Now()
}
type getPeers interface {
GetPeers() []p2p.Peer
}
//go:generate scalegen -types Request,Response
// Request is a sync request.
type Request struct {
ID uint64
}
// Response is a sync response.
type Response struct {
ID uint64
Timestamp uint64
}
// DefaultConfig for Sync.
func DefaultConfig() Config {
return Config{
RoundRetryInterval: 5 * time.Second,
RoundInterval: 30 * time.Minute,
RoundTimeout: 5 * time.Second,
MaxClockOffset: 10 * time.Second,
MaxOffsetErrors: 10,
RequiredResponses: 3,
}
}
// Config for Sync.
type Config struct {
Disable bool `mapstructure:"disable"`
RoundRetryInterval time.Duration `mapstructure:"round-retry-interval"`
RoundInterval time.Duration `mapstructure:"round-interval"`
RoundTimeout time.Duration `mapstructure:"round-timeout"`
MaxClockOffset time.Duration `mapstructure:"max-clock-offset"`
MaxOffsetErrors int `mapstructure:"max-offset-errors"`
RequiredResponses int `mapstructure:"required-responses"`
}
// Option to modify Sync behavior.
type Option func(*Sync)
// WithTime modifies source of time used in Sync.
func WithTime(t Time) Option {
return func(s *Sync) {
s.time = t
}
}
// WithContext modifies parent context that is used for all operations in Sync.
func WithContext(ctx context.Context) Option {
return func(s *Sync) {
s.ctx = ctx
}
}
// WithLog modifies Log used in Sync.
func WithLog(lg log.Log) Option {
return func(s *Sync) {
s.log = lg
}
}
// WithConfig modifies config used in Sync.
func WithConfig(config Config) Option {
return func(s *Sync) {
s.config = config
}
}
// New creates Sync instance and returns pointer.
func New(h host.Host, peers getPeers, opts ...Option) *Sync {
sync := &Sync{
log: log.NewNop(),
ctx: context.Background(),
time: systemTime{},
h: h,
config: DefaultConfig(),
peers: peers,
}
for _, opt := range opts {
opt(sync)
}
sync.ctx, sync.cancel = context.WithCancel(sync.ctx)
h.SetStreamHandler(protocolName, sync.streamHandler)
return sync
}
// Sync manages background worker that compares peers time with system time.
type Sync struct {
errCnt uint32
config Config
log log.Log
time Time
h host.Host
peers getPeers
eg errgroup.Group
ctx context.Context
cancel func()
}
func (s *Sync) streamHandler(stream network.Stream) {
defer stream.Close()
_ = stream.SetDeadline(s.time.Now().Add(s.config.RoundTimeout))
defer stream.SetDeadline(time.Time{})
var request Request
if _, err := codec.DecodeFrom(stream, &request); err != nil {
s.log.With().Warning("can't decode request", log.Err(err))
return
}
resp := Response{
ID: request.ID,
Timestamp: uint64(s.time.Now().UnixNano()),
}
if _, err := codec.EncodeTo(stream, &resp); err != nil {
s.log.With().Warning("can't encode response", log.Err(err))
}
}
// Start background workers.
func (s *Sync) Start() {
s.eg.Go(func() error {
return s.run()
})
}
// Stop background workers.
func (s *Sync) Stop() {
s.cancel()
s.Wait()
}
// Wait will return first error that is returned by background workers.
func (s *Sync) Wait() error {
err := s.eg.Wait()
if errors.Is(err, context.Canceled) {
return nil
}
return fmt.Errorf("taskgroup: %w", err)
}
func (s *Sync) run() error {
var (
timer *time.Timer
round uint64
)
s.log.With().Debug("started sync background worker")
defer s.log.With().Debug("exiting sync background worker")
for {
prs := s.peers.GetPeers()
timeout := s.config.RoundRetryInterval
if len(prs) >= s.config.RequiredResponses {
s.log.With().Debug("starting time sync round with peers",
log.Uint64("round", round),
log.Int("peers_count", len(prs)),
log.Uint32("errors_count", atomic.LoadUint32(&s.errCnt)),
)
ctx, cancel := context.WithTimeout(s.ctx, s.config.RoundTimeout)
offset, err := s.GetOffset(ctx, round, prs)
cancel()
if err == nil {
if offset > s.config.MaxClockOffset || (offset < 0 && -offset > s.config.MaxClockOffset) {
s.log.With().Warning("peers offset is larger than max allowed clock difference",
log.Uint64("round", round),
log.Duration("offset", offset),
log.Duration("max_offset", s.config.MaxClockOffset),
)
if atomic.AddUint32(&s.errCnt, 1) == uint32(s.config.MaxOffsetErrors) {
return clockError{
err: ErrPeersNotSynced,
details: clockErrorDetails{Drift: offset},
}
}
} else {
s.log.With().Debug("peers offset is within max allowed clock difference",
log.Uint64("round", round),
log.Duration("offset", offset),
log.Duration("max_offset", s.config.MaxClockOffset),
)
atomic.StoreUint32(&s.errCnt, 0)
}
offsetGauge.Set(offset.Seconds())
timeout = s.config.RoundInterval
} else {
s.log.With().Error("failed to fetch offset from peers", log.Err(err))
}
round++
}
if timer == nil {
timer = time.NewTimer(timeout)
} else {
timer.Reset(timeout)
}
select {
case <-s.ctx.Done():
return fmt.Errorf("context done: %w", s.ctx.Err())
case <-timer.C:
}
}
}
// GetOffset computes offset from received response. The method is stateless and safe to use concurrently.
func (s *Sync) GetOffset(ctx context.Context, id uint64, prs []p2p.Peer) (time.Duration, error) {
var (
responses = make(chan Response, len(prs))
round = round{
ID: id,
Timestamp: s.time.Now().UnixNano(),
RequiredResponses: s.config.RequiredResponses,
}
wg sync.WaitGroup
)
buf, err := codec.Encode(&Request{ID: id})
if err != nil {
s.log.With().Panic("can't encode request to bytes", log.Err(err))
}
for _, pid := range prs {
wg.Add(1)
go func(pid p2p.Peer) {
defer wg.Done()
logger := s.log.WithFields(log.String("pid", pid.Pretty())).With()
stream, err := s.h.NewStream(network.WithNoDial(ctx, "existing connection"), pid, protocolName)
if err != nil {
logger.Warning("failed to create new stream", log.Err(err))
return
}
defer stream.Close()
_ = stream.SetDeadline(s.time.Now().Add(s.config.RoundTimeout))
defer stream.SetDeadline(time.Time{})
if _, err := stream.Write(buf); err != nil {
logger.Warning("failed to send a request", log.Err(err))
return
}
var resp Response
if _, err := codec.DecodeFrom(stream, &resp); err != nil {
logger.Warning("failed to read response from peer", log.Err(err))
return
}
select {
case <-ctx.Done():
case responses <- resp:
}
}(pid)
}
go func() {
wg.Wait()
close(responses)
}()
for resp := range responses {
round.AddResponse(resp, s.time.Now().UnixNano())
}
if round.Ready() {
return round.Offset(), nil
}
return 0, fmt.Errorf("%w: failed on timeout", ErrTimesyncFailed)
}