diff --git a/CHANGELOG.md b/CHANGELOG.md
index d65ced9ef617..858b07217934 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,6 +25,8 @@ changes.
 
 ### Fixed
 
+- Protocol: handling `query_channel_range` for large numbers of blocks
+  (eg. 4 billion) was slow due to a bug.
 
 ### Security
 
diff --git a/gossipd/gossipd.c b/gossipd/gossipd.c
index d04300a5c910..5f09194de3cc 100644
--- a/gossipd/gossipd.c
+++ b/gossipd/gossipd.c
@@ -659,9 +659,14 @@ static void reply_channel_range(struct peer *peer,
 /*~ When we need to send an array of channels, it might go over our 64k packet
  * size.  If it doesn't, we recurse, splitting in two, etc.  Each message
  * indicates what blocks it contains, so the recipient knows when we're
- * finished. */
+ * finished.
+ *
+ * tail_blocks is the empty blocks at the end, in case they asked for all
+ * blocks to 4 billion.
+ */
 static void queue_channel_ranges(struct peer *peer,
-				 u32 first_blocknum, u32 number_of_blocks)
+				 u32 first_blocknum, u32 number_of_blocks,
+				 u32 tail_blocks)
 {
 	struct routing_state *rstate = peer->daemon->rstate;
 	u8 *encoded = encode_short_channel_ids_start(tmpctx);
@@ -704,7 +709,8 @@ static void queue_channel_ranges(struct peer *peer,
 
 	/* If we can encode that, fine: send it */
 	if (encode_short_channel_ids_end(&encoded, max_encoded_bytes)) {
-		reply_channel_range(peer, first_blocknum, number_of_blocks,
+		reply_channel_range(peer, first_blocknum,
+				    number_of_blocks + tail_blocks,
 				    encoded);
 		return;
 	}
@@ -717,22 +723,26 @@ static void queue_channel_ranges(struct peer *peer,
 			      first_blocknum);
 		return;
 	}
-	status_debug("queue_channel_ranges full: splitting %u+%u and %u+%u",
+	status_debug("queue_channel_ranges full: splitting %u+%u and %u+%u(+%u)",
 		     first_blocknum,
 		     number_of_blocks / 2,
 		     first_blocknum + number_of_blocks / 2,
-		     number_of_blocks - number_of_blocks / 2);
-	queue_channel_ranges(peer, first_blocknum, number_of_blocks / 2);
+		     number_of_blocks - number_of_blocks / 2,
+		     tail_blocks);
+	queue_channel_ranges(peer, first_blocknum, number_of_blocks / 2, 0);
 	queue_channel_ranges(peer, first_blocknum + number_of_blocks / 2,
-			     number_of_blocks - number_of_blocks / 2);
+			     number_of_blocks - number_of_blocks / 2,
+			     tail_blocks);
 }
 
 /*~ The peer can ask for all channels is a series of blocks.  We reply with one
  * or more messages containing the short_channel_ids. */
 static u8 *handle_query_channel_range(struct peer *peer, const u8 *msg)
 {
+	struct routing_state *rstate = peer->daemon->rstate;
 	struct bitcoin_blkid chain_hash;
-	u32 first_blocknum, number_of_blocks;
+	u32 first_blocknum, number_of_blocks, tail_blocks;
+	struct short_channel_id last_scid;
 
 	if (!fromwire_query_channel_range(msg, &chain_hash,
 					  &first_blocknum, &number_of_blocks)) {
@@ -751,14 +761,25 @@ static u8 *handle_query_channel_range(struct peer *peer, const u8 *msg)
 		return NULL;
 	}
 
-	/* This checks for 32-bit overflow! */
-	if (first_blocknum + number_of_blocks < first_blocknum) {
-		return towire_errorfmt(peer, NULL,
-				       "query_channel_range overflow %u+%u",
-				       first_blocknum, number_of_blocks);
-	}
-
-	queue_channel_ranges(peer, first_blocknum, number_of_blocks);
+	/* If they ask for number_of_blocks UINTMAX, and we have to divide
+	 * and conquer, we'll do a lot of unnecessary work.  Cap it at the
+	 * last value we have, then send an empty reply. */
+	if (uintmap_last(&rstate->chanmap, &last_scid.u64)) {
+		u32 last_block = short_channel_id_blocknum(&last_scid);
+
+		/* u64 here avoids overflow on number_of_blocks
+		   UINTMAX for example */
+		if ((u64)first_blocknum + number_of_blocks > last_block) {
+			tail_blocks = first_blocknum + number_of_blocks
+				- last_block - 1;
+			number_of_blocks -= tail_blocks;
+		} else
+			tail_blocks = 0;
+	} else
+		tail_blocks = 0;
+
+	queue_channel_ranges(peer, first_blocknum, number_of_blocks,
+			     tail_blocks);
 	return NULL;
 }
 
@@ -2291,6 +2312,13 @@ static struct io_plan *query_channel_range(struct io_conn *conn,
 		goto fail;
 	}
 
+	/* Check for overflow on 32-bit machines! */
+	if (BITMAP_NWORDS(number_of_blocks) < number_of_blocks / BITMAP_WORD_BITS) {
+		status_broken("query_channel_range: huge number_of_blocks (%u) not supported",
+			number_of_blocks);
+		goto fail;
+	}
+
 	status_debug("sending query_channel_range for blocks %u+%u",
 		     first_blocknum, number_of_blocks);
 	msg = towire_query_channel_range(NULL, &daemon->chain_hash,
diff --git a/tests/test_gossip.py b/tests/test_gossip.py
index 71274db236ef..06a72f90a91c 100644
--- a/tests/test_gossip.py
+++ b/tests/test_gossip.py
@@ -4,7 +4,6 @@
 import json
 import logging
 import os
-import pytest
 import struct
 import subprocess
 import time
@@ -548,7 +547,6 @@ def check_gossip(n):
 
 
 @unittest.skipIf(not DEVELOPER, "needs DEVELOPER=1")
-@pytest.mark.xfail(strict=True)
 def test_gossip_query_channel_range(node_factory, bitcoind):
     l1, l2, l3, l4 = node_factory.line_graph(4, opts={'log-level': 'io'},
                                              fundchannel=False)
@@ -662,8 +660,8 @@ def test_gossip_query_channel_range(node_factory, bitcoind):
                                          first=0,
                                          num=1000000)
 
-    # Turns out it sends huge number of empty replies here.
-    l1.daemon.wait_for_logs([r'\[IN\] 0108'] * 21)
+    # Turns out it sends: 0+53, 53+26, 79+13, 92+7, 99+3, 102+2, 104+1, 105+999895
+    l1.daemon.wait_for_logs([r'\[IN\] 0108'] * 8)
 
     # It should definitely have split
     assert ret['final_first_block'] != 0 or ret['final_num_blocks'] != 1000000
@@ -673,11 +671,11 @@ def test_gossip_query_channel_range(node_factory, bitcoind):
     assert ret['short_channel_ids'][1] == scid23
     l2.daemon.wait_for_log('queue_channel_ranges full: splitting')
 
-    # Test overflow case doesn't split forever; should only get 32 for this.
+    # Test overflow case doesn't split forever; should still only get 8 for this
     ret = l1.rpc.dev_query_channel_range(id=l2.info['id'],
                                          first=1,
                                          num=429496000)
-    l1.daemon.wait_for_logs([r'\[IN\] 0108'] * 32)
+    l1.daemon.wait_for_logs([r'\[IN\] 0108'] * 8)
 
     # And no more!
     time.sleep(1)