Skip to content

Commit

Permalink
Merge pull request #566 from redapple/offsite-stats
Browse files Browse the repository at this point in the history
OffsiteMiddleware: add 2 stats counters
  • Loading branch information
dangra committed Jan 31, 2014
2 parents 1e553ec + fd5b405 commit e1c6d3f
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 2 deletions.
7 changes: 6 additions & 1 deletion scrapy/contrib/spidermiddleware/offsite.py
Expand Up @@ -13,9 +13,12 @@

class OffsiteMiddleware(object):

def __init__(self, stats):
self.stats = stats

@classmethod
def from_crawler(cls, crawler):
o = cls()
o = cls(crawler.stats)
crawler.signals.connect(o.spider_opened, signal=signals.spider_opened)
return o

Expand All @@ -30,6 +33,8 @@ def process_spider_output(self, response, result, spider):
self.domains_seen.add(domain)
log.msg(format="Filtered offsite request to %(domain)r: %(request)s",
level=log.DEBUG, spider=spider, domain=domain, request=x)
self.stats.inc_value('offsite/domains', spider=spider)
self.stats.inc_value('offsite/filtered', spider=spider)
else:
yield x

Expand Down
4 changes: 3 additions & 1 deletion scrapy/tests/test_spidermiddleware_offsite.py
Expand Up @@ -3,13 +3,15 @@
from scrapy.http import Response, Request
from scrapy.spider import Spider
from scrapy.contrib.spidermiddleware.offsite import OffsiteMiddleware
from scrapy.utils.test import get_crawler


class TestOffsiteMiddleware(TestCase):

def setUp(self):
self.spider = self._get_spider()
self.mw = OffsiteMiddleware()
crawler = get_crawler()
self.mw = OffsiteMiddleware.from_crawler(crawler)
self.mw.spider_opened(self.spider)

def _get_spider(self):
Expand Down

0 comments on commit e1c6d3f

Please sign in to comment.