diff --git a/CHANGELOG.md b/CHANGELOG.md index d65ced9ef617..858b07217934 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,8 @@ changes. ### Fixed +- Protocol: handling `query_channel_range` for large numbers of blocks + (eg. 4 billion) was slow due to a bug. ### Security diff --git a/gossipd/gossipd.c b/gossipd/gossipd.c index d04300a5c910..5f09194de3cc 100644 --- a/gossipd/gossipd.c +++ b/gossipd/gossipd.c @@ -659,9 +659,14 @@ static void reply_channel_range(struct peer *peer, /*~ When we need to send an array of channels, it might go over our 64k packet * size. If it doesn't, we recurse, splitting in two, etc. Each message * indicates what blocks it contains, so the recipient knows when we're - * finished. */ + * finished. + * + * tail_blocks is the empty blocks at the end, in case they asked for all + * blocks to 4 billion. + */ static void queue_channel_ranges(struct peer *peer, - u32 first_blocknum, u32 number_of_blocks) + u32 first_blocknum, u32 number_of_blocks, + u32 tail_blocks) { struct routing_state *rstate = peer->daemon->rstate; u8 *encoded = encode_short_channel_ids_start(tmpctx); @@ -704,7 +709,8 @@ static void queue_channel_ranges(struct peer *peer, /* If we can encode that, fine: send it */ if (encode_short_channel_ids_end(&encoded, max_encoded_bytes)) { - reply_channel_range(peer, first_blocknum, number_of_blocks, + reply_channel_range(peer, first_blocknum, + number_of_blocks + tail_blocks, encoded); return; } @@ -717,22 +723,26 @@ static void queue_channel_ranges(struct peer *peer, first_blocknum); return; } - status_debug("queue_channel_ranges full: splitting %u+%u and %u+%u", + status_debug("queue_channel_ranges full: splitting %u+%u and %u+%u(+%u)", first_blocknum, number_of_blocks / 2, first_blocknum + number_of_blocks / 2, - number_of_blocks - number_of_blocks / 2); - queue_channel_ranges(peer, first_blocknum, number_of_blocks / 2); + number_of_blocks - number_of_blocks / 2, + tail_blocks); + queue_channel_ranges(peer, first_blocknum, number_of_blocks / 2, 0); queue_channel_ranges(peer, first_blocknum + number_of_blocks / 2, - number_of_blocks - number_of_blocks / 2); + number_of_blocks - number_of_blocks / 2, + tail_blocks); } /*~ The peer can ask for all channels is a series of blocks. We reply with one * or more messages containing the short_channel_ids. */ static u8 *handle_query_channel_range(struct peer *peer, const u8 *msg) { + struct routing_state *rstate = peer->daemon->rstate; struct bitcoin_blkid chain_hash; - u32 first_blocknum, number_of_blocks; + u32 first_blocknum, number_of_blocks, tail_blocks; + struct short_channel_id last_scid; if (!fromwire_query_channel_range(msg, &chain_hash, &first_blocknum, &number_of_blocks)) { @@ -751,14 +761,25 @@ static u8 *handle_query_channel_range(struct peer *peer, const u8 *msg) return NULL; } - /* This checks for 32-bit overflow! */ - if (first_blocknum + number_of_blocks < first_blocknum) { - return towire_errorfmt(peer, NULL, - "query_channel_range overflow %u+%u", - first_blocknum, number_of_blocks); - } - - queue_channel_ranges(peer, first_blocknum, number_of_blocks); + /* If they ask for number_of_blocks UINTMAX, and we have to divide + * and conquer, we'll do a lot of unnecessary work. Cap it at the + * last value we have, then send an empty reply. */ + if (uintmap_last(&rstate->chanmap, &last_scid.u64)) { + u32 last_block = short_channel_id_blocknum(&last_scid); + + /* u64 here avoids overflow on number_of_blocks + UINTMAX for example */ + if ((u64)first_blocknum + number_of_blocks > last_block) { + tail_blocks = first_blocknum + number_of_blocks + - last_block - 1; + number_of_blocks -= tail_blocks; + } else + tail_blocks = 0; + } else + tail_blocks = 0; + + queue_channel_ranges(peer, first_blocknum, number_of_blocks, + tail_blocks); return NULL; } @@ -2291,6 +2312,13 @@ static struct io_plan *query_channel_range(struct io_conn *conn, goto fail; } + /* Check for overflow on 32-bit machines! */ + if (BITMAP_NWORDS(number_of_blocks) < number_of_blocks / BITMAP_WORD_BITS) { + status_broken("query_channel_range: huge number_of_blocks (%u) not supported", + number_of_blocks); + goto fail; + } + status_debug("sending query_channel_range for blocks %u+%u", first_blocknum, number_of_blocks); msg = towire_query_channel_range(NULL, &daemon->chain_hash, diff --git a/tests/test_gossip.py b/tests/test_gossip.py index 71274db236ef..06a72f90a91c 100644 --- a/tests/test_gossip.py +++ b/tests/test_gossip.py @@ -4,7 +4,6 @@ import json import logging import os -import pytest import struct import subprocess import time @@ -548,7 +547,6 @@ def check_gossip(n): @unittest.skipIf(not DEVELOPER, "needs DEVELOPER=1") -@pytest.mark.xfail(strict=True) def test_gossip_query_channel_range(node_factory, bitcoind): l1, l2, l3, l4 = node_factory.line_graph(4, opts={'log-level': 'io'}, fundchannel=False) @@ -662,8 +660,8 @@ def test_gossip_query_channel_range(node_factory, bitcoind): first=0, num=1000000) - # Turns out it sends huge number of empty replies here. - l1.daemon.wait_for_logs([r'\[IN\] 0108'] * 21) + # Turns out it sends: 0+53, 53+26, 79+13, 92+7, 99+3, 102+2, 104+1, 105+999895 + l1.daemon.wait_for_logs([r'\[IN\] 0108'] * 8) # It should definitely have split assert ret['final_first_block'] != 0 or ret['final_num_blocks'] != 1000000 @@ -673,11 +671,11 @@ def test_gossip_query_channel_range(node_factory, bitcoind): assert ret['short_channel_ids'][1] == scid23 l2.daemon.wait_for_log('queue_channel_ranges full: splitting') - # Test overflow case doesn't split forever; should only get 32 for this. + # Test overflow case doesn't split forever; should still only get 8 for this ret = l1.rpc.dev_query_channel_range(id=l2.info['id'], first=1, num=429496000) - l1.daemon.wait_for_logs([r'\[IN\] 0108'] * 32) + l1.daemon.wait_for_logs([r'\[IN\] 0108'] * 8) # And no more! time.sleep(1)