-
Notifications
You must be signed in to change notification settings - Fork 402
/
chore.go
138 lines (119 loc) · 3.79 KB
/
chore.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
// Copyright (C) 2019 Storj Labs, Inc.
// See LICENSE for copying information.
package gracefulexit
import (
"context"
"database/sql"
"time"
"github.com/zeebo/errs"
"go.uber.org/zap"
"storj.io/common/storj"
"storj.io/common/sync2"
"storj.io/storj/satellite/metainfo"
"storj.io/storj/satellite/overlay"
)
// Chore populates the graceful exit transfer queue.
//
// architecture: Chore
type Chore struct {
log *zap.Logger
Loop *sync2.Cycle
db DB
config Config
overlay overlay.DB
metainfoLoop *metainfo.Loop
}
// NewChore instantiates Chore.
func NewChore(log *zap.Logger, db DB, overlay overlay.DB, metaLoop *metainfo.Loop, config Config) *Chore {
return &Chore{
log: log,
Loop: sync2.NewCycle(config.ChoreInterval),
db: db,
config: config,
overlay: overlay,
metainfoLoop: metaLoop,
}
}
// Run starts the chore.
func (chore *Chore) Run(ctx context.Context) (err error) {
defer mon.Task()(&ctx)(&err)
return chore.Loop.Run(ctx, func(ctx context.Context) (err error) {
defer mon.Task()(&ctx)(&err)
exitingNodes, err := chore.overlay.GetExitingNodes(ctx)
if err != nil {
chore.log.Error("error retrieving nodes that have not finished exiting", zap.Error(err))
return nil
}
nodeCount := len(exitingNodes)
if nodeCount == 0 {
return nil
}
chore.log.Debug("found exiting nodes", zap.Int("exitingNodes", nodeCount))
exitingNodesLoopIncomplete := make(storj.NodeIDList, 0, nodeCount)
for _, node := range exitingNodes {
if node.ExitLoopCompletedAt == nil {
exitingNodesLoopIncomplete = append(exitingNodesLoopIncomplete, node.NodeID)
continue
}
progress, err := chore.db.GetProgress(ctx, node.NodeID)
if err != nil && !errs.Is(err, sql.ErrNoRows) {
chore.log.Error("error retrieving progress for node", zap.Stringer("Node ID", node.NodeID), zap.Error(err))
continue
}
lastActivityTime := *node.ExitLoopCompletedAt
if progress != nil {
lastActivityTime = progress.UpdatedAt
}
// check inactive timeframe
if lastActivityTime.Add(chore.config.MaxInactiveTimeFrame).Before(time.Now().UTC()) {
exitStatusRequest := &overlay.ExitStatusRequest{
NodeID: node.NodeID,
ExitSuccess: false,
ExitFinishedAt: time.Now().UTC(),
}
mon.Meter("graceful_exit_fail_inactive").Mark(1)
_, err = chore.overlay.UpdateExitStatus(ctx, exitStatusRequest)
if err != nil {
chore.log.Error("error updating exit status", zap.Error(err))
continue
}
// remove all items from the transfer queue
err := chore.db.DeleteTransferQueueItems(ctx, node.NodeID)
if err != nil {
chore.log.Error("error deleting node from transfer queue", zap.Error(err))
}
}
}
// Populate transfer queue for nodes that have not completed the exit loop yet
pathCollector := NewPathCollector(chore.db, exitingNodesLoopIncomplete, chore.log, chore.config.ChoreBatchSize)
err = chore.metainfoLoop.Join(ctx, pathCollector)
if err != nil {
chore.log.Error("error joining metainfo loop.", zap.Error(err))
return nil
}
err = pathCollector.Flush(ctx)
if err != nil {
chore.log.Error("error flushing collector buffer.", zap.Error(err))
return nil
}
now := time.Now().UTC()
for _, nodeID := range exitingNodesLoopIncomplete {
exitStatus := overlay.ExitStatusRequest{
NodeID: nodeID,
ExitLoopCompletedAt: now,
}
_, err = chore.overlay.UpdateExitStatus(ctx, &exitStatus)
if err != nil {
chore.log.Error("error updating exit status.", zap.Error(err))
}
bytesToTransfer := pathCollector.nodeIDStorage[nodeID]
mon.IntVal("graceful_exit_init_bytes_stored").Observe(bytesToTransfer)
}
return nil
})
}
// Close closes chore.
func (chore *Chore) Close() error {
chore.Loop.Close()
return nil
}