Skip to content

Commit

Permalink
Remove concurrency limitation when using download delays and still en…
Browse files Browse the repository at this point in the history
…sure inter-request delays are enforced
  • Loading branch information
dangra committed Dec 7, 2012
1 parent 8232569 commit 487b9b5
Showing 1 changed file with 12 additions and 8 deletions.
20 changes: 12 additions & 8 deletions scrapy/core/downloader/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ def __init__(self, concurrency, delay, settings):
self.queue = deque()
self.transferring = set()
self.lastseen = 0
self.latercall = None

def free_transfer_slots(self):
return self.concurrency - len(self.transferring)
Expand Down Expand Up @@ -57,9 +58,6 @@ def _get_concurrency_delay(concurrency, spider, settings):
if hasattr(spider, 'max_concurrent_requests'):
concurrency = spider.max_concurrent_requests

if delay > 0:
concurrency = 1 # force concurrency=1 if download delay required

return concurrency, delay


Expand Down Expand Up @@ -87,6 +85,7 @@ def _deactivate(response):
slot.active.remove(request)
if not slot.active: # remove empty slots
self.inactive_slots[key] = self.slots.pop(key)

return response

dlfunc = partial(self._enqueue_request, slot=slot)
Expand Down Expand Up @@ -124,23 +123,28 @@ def _downloaded(response):
return deferred

def _process_queue(self, spider, slot):
if slot.latercall and slot.latercall.active():
return

# Delay queue processing if a download_delay is configured
now = time()
delay = slot.download_delay()
if delay:
penalty = delay - now + slot.lastseen
if penalty > 0 and slot.free_transfer_slots():
d = defer.Deferred()
d.addCallback(self._process_queue, slot)
reactor.callLater(penalty, d.callback, spider)
if penalty > 0:
slot.latercall = reactor.callLater(penalty, self._process_queue, spider, slot)
return
slot.lastseen = now

# Process enqueued requests if there are free slots to transfer for this slot
while slot.queue and slot.free_transfer_slots() > 0:
slot.lastseen = now
request, deferred = slot.queue.popleft()
dfd = self._download(slot, request, spider)
dfd.chainDeferred(deferred)
# prevent burst if inter-request delays were configured
if delay:
self._process_queue(spider, slot)
break

def _download(self, slot, request, spider):
# The order is very important for the following deferreds. Do not change!
Expand Down

0 comments on commit 487b9b5

Please sign in to comment.