Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion discovery/gossiper.go
Original file line number Diff line number Diff line change
Expand Up @@ -876,7 +876,14 @@ func (d *AuthenticatedGossiper) stop() {
func (d *AuthenticatedGossiper) ProcessRemoteAnnouncement(ctx context.Context,
msg lnwire.Message, peer lnpeer.Peer) chan error {

errChan := make(chan error, 1)
// Buffer up to two messages on errChan since up to two messages may be
// written and not all callers of this function actually read from
// errChan. Without this buffer goroutines end up blocking on writes to
// errChan, which prevents the gossiper from shutting down cleanly.
//
// TODO(ziggie): Redesign this once the actor model pattern becomes
// available. See https://github.com/lightningnetwork/lnd/pull/9820.
errChan := make(chan error, 2)

// For messages in the known set of channel series queries, we'll
// dispatch the message directly to the GossipSyncer, and skip the main
Expand Down
61 changes: 61 additions & 0 deletions discovery/gossiper_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5235,3 +5235,64 @@ func TestRecoverGossipPanicNilJobID(t *testing.T) {
t.Fatal("timeout waiting for error")
}
}

// TestGossiperShutdownWrongChainAnnouncement tests that the gossiper can shut
// down cleanly after processing a channel announcement with the wrong chain
// hash. This is a regression test for a bug where the gossiper would deadlock
// on shutdown because more errors were sent on the error channel than it would
// buffer, and no one was reading those error messages.
//
// In this test we trigger the sending of two error messages:
// 1. First send when rejecting the wrong-chain announcement
// 2. Second send when SignalDependents returns an error
//
// Since the error channel had a buffer of 1, the second send would block
// forever, preventing the goroutine from completing and causing Stop() to hang
// on wg.Wait().
func TestGossiperShutdownWrongChainAnnouncement(t *testing.T) {
t.Parallel()

// Create a test context with the gossiper configured for MainNet.
tCtx, err := createTestCtx(t, 0, false)
require.NoError(t, err)

// Create a channel announcement with:
// 1. Wrong chain hash (SimNet instead of MainNet)
// 2. NodeID1 == NodeID2
//
// The first condition triggers the first error message to be sent, and
// the second condition causes SignalDependents to attempt to remove the
// same dependent job twice, which then triggers the second error
// message to be sent.
wrongChainAnn := &lnwire.ChannelAnnouncement1{
ChainHash: *chaincfg.SimNetParams.GenesisHash,
ShortChannelID: lnwire.ShortChannelID{
BlockHeight: 1,
TxIndex: 0,
TxPosition: 0,
},
Features: testFeatures,
}
// Use the SAME public key for NodeID1 and NodeID2 to trigger the
// second error message.
copy(wrongChainAnn.NodeID1[:], remoteKeyPub1.SerializeCompressed())
copy(wrongChainAnn.NodeID2[:], remoteKeyPub1.SerializeCompressed())
copy(wrongChainAnn.BitcoinKey1[:], bitcoinKeyPub1.SerializeCompressed())
copy(wrongChainAnn.BitcoinKey2[:], bitcoinKeyPub2.SerializeCompressed())

nodePeer := &mockPeer{remoteKeyPub1, nil, nil, atomic.Bool{}}

// Process the announcement without reading from the error channel,
// exactly as Brontide does.
_ = tCtx.gossiper.ProcessRemoteAnnouncement(
t.Context(), wrongChainAnn, nodePeer,
)

// Give the gossiper time to process the announcement.
time.Sleep(100 * time.Millisecond)

// Now stop the gossiper. This should complete without hanging.
// If the bug is present, Stop() will hang forever because a goroutine
// is blocked trying to send to the error channel a second time.
require.NoError(t, tCtx.gossiper.Stop())
}
7 changes: 7 additions & 0 deletions docs/release-notes/release-notes-0.20.1.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,12 @@
The fix adds automatic format detection to handle both legacy (raw feature
bits) and new (length-prefixed) formats.

* [Fixed a shutdown
deadlock](https://github.com/lightningnetwork/lnd/pull/10540) in the gossiper.
Certain gossip messages could cause multiple error messages to be sent on a
channel that was only expected to be used for a single message. The erring
goroutine would block on the second send, leading to a deadlock at shutdown.

# New Features

## Functional Enhancements
Expand Down Expand Up @@ -152,4 +158,5 @@

* Abdulkbk
* bitromortac
* Matt Morehouse
* Ziggie
Loading