Skip to content

Commit

Permalink
Merge pull request #169 from qshine/pipeline
Browse files Browse the repository at this point in the history
use pipeline when read redis list queue
  • Loading branch information
rmax authored Jun 3, 2020
2 parents 9671d50 + 9a383ab commit 02ae1ea
Showing 1 changed file with 11 additions and 7 deletions.
18 changes: 11 additions & 7 deletions src/scrapy_redis/spiders.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,18 +72,22 @@ def setup_redis(self, crawler=None):
# that's when we will schedule new requests from redis queue
crawler.signals.connect(self.spider_idle, signal=signals.spider_idle)

def lpop_multi(self, redis_key, batch_size):
with self.server.pipeline() as pipe:
pipe.lrange(redis_key, 0, batch_size - 1)
pipe.ltrim(redis_key, batch_size, -1)
datas, _ = pipe.execute()
return datas

def next_requests(self):
"""Returns a request to be scheduled or none."""
use_set = self.settings.getbool('REDIS_START_URLS_AS_SET', defaults.START_URLS_AS_SET)
fetch_one = self.server.spop if use_set else self.server.lpop
fetch_data = self.server.spop if use_set else self.lpop_multi
# XXX: Do we need to use a timeout here?
found = 0
# TODO: Use redis pipeline execution.
while found < self.redis_batch_size:
data = fetch_one(self.redis_key)
if not data:
# Queue empty.
break

datas = fetch_data(self.redis_key, self.redis_batch_size)
for data in datas:
req = self.make_request_from_data(data)
if req:
yield req
Expand Down

0 comments on commit 02ae1ea

Please sign in to comment.