-
Notifications
You must be signed in to change notification settings - Fork 390
/
estimation_chore.go
86 lines (74 loc) · 2.19 KB
/
estimation_chore.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
// Copyright (C) 2019 Storj Labs, Inc.
// See LICENSE for copying information.
package downtime
import (
"context"
"time"
"go.uber.org/zap"
"storj.io/common/sync2"
"storj.io/storj/satellite/overlay"
)
// EstimationChore estimates how long nodes have been offline.
//
// architecture: Chore
type EstimationChore struct {
log *zap.Logger
Loop *sync2.Cycle
config Config
startTime time.Time
overlay *overlay.Service
service *Service
db DB
}
// NewEstimationChore instantiates EstimationChore.
func NewEstimationChore(log *zap.Logger, config Config, overlay *overlay.Service, service *Service, db DB) *EstimationChore {
return &EstimationChore{
log: log,
Loop: sync2.NewCycle(config.EstimationInterval),
config: config,
startTime: time.Now().UTC(),
overlay: overlay,
service: service,
db: db,
}
}
// Run starts the chore.
func (chore *EstimationChore) Run(ctx context.Context) (err error) {
defer mon.Task()(&ctx)(&err)
return chore.Loop.Run(ctx, func(ctx context.Context) (err error) {
defer mon.Task()(&ctx)(&err)
chore.log.Debug("checking uptime of failed nodes",
zap.Stringer("interval", chore.config.EstimationInterval))
offlineNodes, err := chore.overlay.GetOfflineNodesLimited(ctx, chore.config.EstimationBatchSize)
if err != nil {
chore.log.Error("error getting offline nodes", zap.Error(err))
return nil
}
for _, node := range offlineNodes {
success, err := chore.service.CheckAndUpdateNodeAvailability(ctx, node.ID, node.Address)
if err != nil {
chore.log.Error("error during downtime estimation ping back",
zap.Bool("success", success),
zap.Error(err))
continue
}
if !success && node.LastContactFailure.After(chore.startTime) {
now := time.Now().UTC()
duration := now.Sub(node.LastContactFailure)
err = chore.db.Add(ctx, node.ID, now, duration)
if err != nil {
chore.log.Error("error adding node seconds offline information.",
zap.Stringer("node ID", node.ID),
zap.Stringer("duration", duration),
zap.Error(err))
}
}
}
return nil
})
}
// Close closes chore.
func (chore *EstimationChore) Close() error {
chore.Loop.Close()
return nil
}