Skip to content

Commit

Permalink
Avoid alignment of reapNetwork and tableEntries
Browse files Browse the repository at this point in the history
Make sure that the network is garbage collected after
the entries. Entries to be deleted requires that the network
is present.

Signed-off-by: Flavio Crisciani <flavio.crisciani@docker.com>
  • Loading branch information
Flavio Crisciani committed Sep 22, 2017
1 parent 3feb3aa commit fbba555
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 13 deletions.
21 changes: 13 additions & 8 deletions networkdb/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,15 @@ import (
)

const (
reapInterval = 30 * time.Minute
reapPeriod = 5 * time.Second
retryInterval = 1 * time.Second
nodeReapInterval = 24 * time.Hour
nodeReapPeriod = 2 * time.Hour
// The garbage collection logic for entries leverage the presence of the network.
// For this reason the expiration time of the network is put slightly higher than the entry expiration so that
// there is at least 5 extra cycle to make sure that all the entries are properly deleted before deleting the network.
reapEntryInterval = 30 * time.Minute
reapNetworkInterval = reapEntryInterval + 5*reapPeriod
reapPeriod = 5 * time.Second
retryInterval = 1 * time.Second
nodeReapInterval = 24 * time.Hour
nodeReapPeriod = 2 * time.Hour
)

type logWriter struct{}
Expand Down Expand Up @@ -300,8 +304,9 @@ func (nDB *NetworkDB) reconnectNode() {
// the reaper runs. NOTE nDB.reapTableEntries updates the reapTime with a readlock. This
// is safe as long as no other concurrent path touches the reapTime field.
func (nDB *NetworkDB) reapState() {
nDB.reapNetworks()
// The reapTableEntries leverage the presence of the network so garbage collect entries first
nDB.reapTableEntries()
nDB.reapNetworks()
}

func (nDB *NetworkDB) reapNetworks() {
Expand Down Expand Up @@ -414,8 +419,8 @@ func (nDB *NetworkDB) gossip() {
// Collect stats and print the queue info, note this code is here also to have a view of the queues empty
network.qMessagesSent += len(msgs)
if printStats {
logrus.Infof("NetworkDB stats - net:%s Entries:%d Queue qLen:%d netPeers:%d netMsg/s:%d",
nid, network.entriesNumber, broadcastQ.NumQueued(), broadcastQ.NumNodes(),
logrus.Infof("NetworkDB stats - netID:%s leaving:%t netPeers:%d entries:%d Queue qLen:%d netMsg/s:%d",
nid, network.leaving, broadcastQ.NumNodes(), network.entriesNumber, broadcastQ.NumQueued(),
network.qMessagesSent/int((nDB.config.StatsPrintPeriod/time.Second)))
network.qMessagesSent = 0
}
Expand Down
4 changes: 2 additions & 2 deletions networkdb/delegate.go
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ func (nDB *NetworkDB) handleNetworkEvent(nEvent *NetworkEvent) bool {
n.ltime = nEvent.LTime
n.leaving = nEvent.Type == NetworkEventTypeLeave
if n.leaving {
n.reapTime = reapInterval
n.reapTime = reapNetworkInterval

// The remote node is leaving the network, but not the gossip cluster.
// Mark all its entries in deleted state, this will guarantee that
Expand Down Expand Up @@ -242,7 +242,7 @@ func (nDB *NetworkDB) handleTableEvent(tEvent *TableEvent) bool {
// field. If that is not the case, this can be a BUG
if e.deleting && e.reapTime == 0 {
logrus.Warnf("handleTableEvent object %+v has a 0 reapTime, is the cluster running the same docker engine version?", tEvent)
e.reapTime = reapInterval
e.reapTime = reapEntryInterval
}

nDB.Lock()
Expand Down
7 changes: 4 additions & 3 deletions networkdb/networkdb.go
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,7 @@ func (nDB *NetworkDB) DeleteEntry(tname, nid, key string) error {
node: nDB.config.NodeName,
value: value,
deleting: true,
reapTime: reapInterval,
reapTime: reapEntryInterval,
}

if err := nDB.sendTableEvent(TableEventTypeDelete, nid, tname, key, entry); err != nil {
Expand Down Expand Up @@ -478,7 +478,7 @@ func (nDB *NetworkDB) deleteNodeNetworkEntries(nid, node string) {
node: oldEntry.node,
value: oldEntry.value,
deleting: true,
reapTime: reapInterval,
reapTime: reapEntryInterval,
}

// we arrived at this point in 2 cases:
Expand Down Expand Up @@ -619,8 +619,9 @@ func (nDB *NetworkDB) LeaveNetwork(nid string) error {
return fmt.Errorf("could not find network %s while trying to leave", nid)
}

logrus.Debugf("%s: leaving network %s", nDB.config.NodeName, nid)
n.ltime = ltime
n.reapTime = reapInterval
n.reapTime = reapNetworkInterval
n.leaving = true
return nil
}
Expand Down

0 comments on commit fbba555

Please sign in to comment.