Skip to content

Commit

Permalink
fix(blocksync): use timer instead of time.After (cometbft#2584)
Browse files Browse the repository at this point in the history
`time.After` gets reset after each iteration of the `for` loop, which is
not what we want. We want the timer to fire after 30 sec if both peers
(first and second) don't respond.
  • Loading branch information
melekes authored and czarcas7ic committed Mar 12, 2024
1 parent 77fb4c9 commit b53e6fa
Showing 1 changed file with 19 additions and 4 deletions.
23 changes: 19 additions & 4 deletions blocksync/pool.go
Original file line number Diff line number Diff line change
Expand Up @@ -720,11 +720,11 @@ PICK_PEER_LOOP:

// Picks a second peer and sends a request to it. If the second peer is already
// set, does nothing.
func (bpr *bpRequester) pickSecondPeerAndSendRequest() {
func (bpr *bpRequester) pickSecondPeerAndSendRequest() (picked bool) {
bpr.mtx.Lock()
if bpr.secondPeerID != "" {
bpr.mtx.Unlock()
return
return false
}
peerID := bpr.peerID
bpr.mtx.Unlock()
Expand All @@ -736,7 +736,10 @@ func (bpr *bpRequester) pickSecondPeerAndSendRequest() {
bpr.mtx.Unlock()

bpr.pool.sendRequest(bpr.height, secondPeer.id)
return true
}

return false
}

// Informs the requester of a new pool's height.
Expand All @@ -761,6 +764,9 @@ OUTER_LOOP:
bpr.pickSecondPeerAndSendRequest()
}

retryTimer := time.NewTimer(requestRetrySeconds * time.Second)
defer retryTimer.Stop()

for {
select {
case <-bpr.pool.Quit():
Expand All @@ -770,7 +776,7 @@ OUTER_LOOP:
return
case <-bpr.Quit():
return
case <-time.After(requestRetrySeconds * time.Second):
case <-retryTimer.C:
if !gotBlock {
bpr.Logger.Debug("Retrying block request(s) after timeout", "height", bpr.height, "peer", bpr.peerID, "secondPeerID", bpr.secondPeerID)
bpr.reset(bpr.peerID)
Expand All @@ -787,12 +793,21 @@ OUTER_LOOP:
// If both peers returned NoBlockResponse or bad block, reschedule both
// requests. If not, wait for the other peer.
if len(bpr.requestedFrom()) == 0 {
retryTimer.Stop()
continue OUTER_LOOP
}
case newHeight := <-bpr.newHeightCh:
if !gotBlock && bpr.height-newHeight < minBlocksForSingleRequest {
// The operation is a noop if the second peer is already set. The cost is checking a mutex.
bpr.pickSecondPeerAndSendRequest()
//
// If the second peer was just set, reset the retryTimer to give the
// second peer a chance to respond.
if picked := bpr.pickSecondPeerAndSendRequest(); picked {
if !retryTimer.Stop() {
<-retryTimer.C
}
retryTimer.Reset(requestRetrySeconds * time.Second)
}
}
case <-bpr.gotBlockCh:
gotBlock = true
Expand Down

0 comments on commit b53e6fa

Please sign in to comment.