Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

SEP-017 contracts: add tests and minor improvements

  • Loading branch information...
commit 11d29c70059ee82e5608aaf0c958be19ece6e97a 1 parent bf8dc61
@alexcepoi alexcepoi authored
View
4 docs/index.rst
@@ -129,7 +129,7 @@ Solving specific problems
faq
topics/debug
- topics/testing
+ topics/contracts
topics/firefox
topics/firebug
topics/leaks
@@ -144,7 +144,7 @@ Solving specific problems
:doc:`topics/debug`
Learn how to debug common problems of your scrapy spider.
-:doc:`topics/testing`
+:doc:`topics/contracts`
Learn how to use contracts for testing your spiders.
:doc:`topics/firefox`
View
8 docs/topics/testing.rst → docs/topics/contracts.rst
@@ -1,8 +1,8 @@
-.. _topics-testing:
+.. _topics-contracts:
-===============
-Testing Spiders
-===============
+=================
+Spiders Contracts
+=================
Testing spiders can get particularly annoying and while nothing prevents you
from writing unit tests the task gets cumbersome quickly. Scrapy offers an
View
15 scrapy/commands/check.py
@@ -1,24 +1,22 @@
from collections import defaultdict
from functools import wraps
-from scrapy.conf import settings
from scrapy.command import ScrapyCommand
-from scrapy.http import Request
from scrapy.contracts import ContractsManager
-from scrapy.utils import display
from scrapy.utils.misc import load_object
from scrapy.utils.spider import iterate_spider_output
from scrapy.utils.conf import build_component_list
+
def _generate(cb):
""" create a callback which does not return anything """
@wraps(cb)
def wrapper(response):
output = cb(response)
output = list(iterate_spider_output(output))
- # display.pprint(output)
return wrapper
+
class Command(ScrapyCommand):
requires_project = True
default_settings = {'LOG_ENABLED': False}
@@ -31,14 +29,15 @@ def short_desc(self):
def add_options(self, parser):
ScrapyCommand.add_options(self, parser)
- parser.add_option("-l", "--list", dest="list", action="store_true", \
+ parser.add_option("-l", "--list", dest="list", action="store_true",
help="only list contracts, without checking them")
-
def run(self, args, opts):
# load contracts
- contracts = build_component_list(settings['SPIDER_CONTRACTS_BASE'],
- settings['SPIDER_CONTRACTS'])
+ contracts = build_component_list(
+ self.settings['SPIDER_CONTRACTS_BASE'],
+ self.settings['SPIDER_CONTRACTS'],
+ )
self.conman = ContractsManager([load_object(c) for c in contracts])
# contract requests
View
33 scrapy/contracts/__init__.py
@@ -1,12 +1,12 @@
import re
-import inspect
from functools import wraps
from scrapy.http import Request
from scrapy.utils.spider import iterate_spider_output
-from scrapy.utils.misc import get_spec
+from scrapy.utils.python import get_spec
from scrapy.exceptions import ContractFail
+
class ContractsManager(object):
contracts = {}
@@ -27,7 +27,7 @@ def extract_contracts(self, method):
return contracts
- def from_method(self, method):
+ def from_method(self, method, fail=False):
contracts = self.extract_contracts(method)
if contracts:
# calculate request args
@@ -43,12 +43,13 @@ def from_method(self, method):
# execute pre and post hooks in order
for contract in reversed(contracts):
- request = contract.add_pre_hook(request)
+ request = contract.add_pre_hook(request, fail)
for contract in contracts:
- request = contract.add_post_hook(request)
+ request = contract.add_post_hook(request, fail)
return request
+
class Contract(object):
""" Abstract class for contracts """
@@ -56,26 +57,36 @@ def __init__(self, method, *args):
self.method = method
self.args = args
- def add_pre_hook(self, request):
+ def add_pre_hook(self, request, fail=False):
cb = request.callback
+
@wraps(cb)
def wrapper(response):
- try: self.pre_process(response)
+ try:
+ self.pre_process(response)
except ContractFail as e:
- print e.format(self.method)
+ if fail:
+ raise
+ else:
+ print e.format(self.method)
return list(iterate_spider_output(cb(response)))
request.callback = wrapper
return request
- def add_post_hook(self, request):
+ def add_post_hook(self, request, fail=False):
cb = request.callback
+
@wraps(cb)
def wrapper(response):
output = list(iterate_spider_output(cb(response)))
- try: self.post_process(output)
+ try:
+ self.post_process(output)
except ContractFail as e:
- print e.format(self.method)
+ if fail:
+ raise
+ else:
+ print e.format(self.method)
return output
request.callback = wrapper
View
124 scrapy/tests/test_contracts.py
@@ -0,0 +1,124 @@
+from twisted.trial import unittest
+
+from scrapy.spider import BaseSpider
+from scrapy.http import Request
+from scrapy.item import Item, Field
+from scrapy.exceptions import ContractFail
+from scrapy.contracts import ContractsManager
+from scrapy.contracts.default import (
+ UrlContract,
+ ReturnsContract,
+ ScrapesContract,
+)
+
+
+class TestItem(Item):
+ name = Field()
+ url = Field()
+
+
+class ResponseMock(object):
+ url = 'http://scrapy.org'
+
+
+class TestSpider(BaseSpider):
+ name = 'demo_spider'
+
+ def returns_request(self, response):
+ """ method which returns request
+ @url http://scrapy.org
+ @returns requests 1
+ """
+ return Request('http://scrapy.org', callback=self.returns_item)
+
+ def returns_item(self, response):
+ """ method which returns item
+ @url http://scrapy.org
+ @returns items 1 1
+ """
+ return TestItem(url=response.url)
+
+ def returns_fail(self, response):
+ """ method which returns item
+ @url http://scrapy.org
+ @returns items 0 0
+ """
+ return TestItem(url=response.url)
+
+ def scrapes_item_ok(self, response):
+ """ returns item with name and url
+ @url http://scrapy.org
+ @returns items 1 1
+ @scrapes name url
+ """
+ return TestItem(name='test', url=response.url)
+
+ def scrapes_item_fail(self, response):
+ """ returns item with no name
+ @url http://scrapy.org
+ @returns items 1 1
+ @scrapes name url
+ """
+ return TestItem(url=response.url)
+
+ def parse_no_url(self, response):
+ """ method with no url
+ @returns items 1 1
+ """
+ pass
+
+
+class ContractsManagerTest(unittest.TestCase):
+ contracts = [UrlContract, ReturnsContract, ScrapesContract]
+
+ def test_contracts(self):
+ conman = ContractsManager(self.contracts)
+
+ # extract contracts correctly
+ contracts = conman.extract_contracts(TestSpider.returns_request)
+ self.assertEqual(len(contracts), 2)
+ self.assertEqual(frozenset(map(type, contracts)),
+ frozenset([UrlContract, ReturnsContract]))
+
+ # returns request for valid method
+ request = conman.from_method(TestSpider.returns_request)
+ self.assertIsNotNone(request)
+
+ # no request for missing url
+ request = conman.from_method(TestSpider.parse_no_url)
+ self.assertIsNone(request)
+
+ def test_returns(self):
+ conman = ContractsManager(self.contracts)
+
+ spider = TestSpider()
+ response = ResponseMock()
+
+ # returns_item
+ request = conman.from_method(spider.returns_item, fail=True)
+ output = request.callback(response)
+ self.assertEqual(map(type, output), [TestItem])
+
+ # returns_request
+ request = conman.from_method(spider.returns_request, fail=True)
+ output = request.callback(response)
+ self.assertEqual(map(type, output), [Request])
+
+ # returns_fail
+ request = conman.from_method(spider.returns_fail, fail=True)
+ self.assertRaises(ContractFail, request.callback, response)
+
+ def test_scrapes(self):
+ conman = ContractsManager(self.contracts)
+
+ spider = TestSpider()
+ response = ResponseMock()
+
+ # scrapes_item_ok
+ request = conman.from_method(spider.scrapes_item_ok, fail=True)
+ output = request.callback(response)
+ self.assertEqual(map(type, output), [TestItem])
+
+ # scrapes_item_fail
+ request = conman.from_method(spider.scrapes_item_fail, fail=True)
+ self.assertRaises(ContractFail, request.callback, response)
View
15 scrapy/utils/misc.py
@@ -104,18 +104,3 @@ def md5sum(file):
break
m.update(d)
return m.hexdigest()
-
-def get_spec(func):
- """Returns (args, kwargs) tuple for a function
-
- >>> import re
- >>> get_spec(re.match)
- (['pattern', 'string'], {'flags': 0})
- """
- spec = inspect.getargspec(func)
- defaults = spec.defaults or []
-
- firstdefault = len(spec.args) - len(defaults)
- args = spec.args[:firstdefault]
- kwargs = dict(zip(spec.args[firstdefault:], defaults))
- return args, kwargs
View
36 scrapy/utils/python.py
@@ -158,6 +158,42 @@ def get_func_args(func):
raise TypeError('%s is not callable' % type(func))
return func_args
+def get_spec(func):
+ """Returns (args, kwargs) tuple for a function
+ >>> import re
+ >>> get_spec(re.match)
+ (['pattern', 'string'], {'flags': 0})
+
+ >>> class Test(object):
+ ... def __call__(self, val):
+ ... pass
+ ... def method(self, val, flags=0):
+ ... pass
+
+ >>> get_spec(Test)
+ (['self', 'val'], {})
+
+ >>> get_spec(Test.method)
+ (['self', 'val'], {'flags': 0})
+
+ >>> get_spec(Test().method)
+ (['self', 'val'], {'flags': 0})
+ """
+
+ if inspect.isfunction(func) or inspect.ismethod(func):
+ spec = inspect.getargspec(func)
+ elif hasattr(func, '__call__'):
+ spec = inspect.getargspec(func.__call__)
+ else:
+ raise TypeError('%s is not callable' % type(func))
+
+ defaults = spec.defaults or []
+
+ firstdefault = len(spec.args) - len(defaults)
+ args = spec.args[:firstdefault]
+ kwargs = dict(zip(spec.args[firstdefault:], defaults))
+ return args, kwargs
+
def equal_attributes(obj1, obj2, attributes):
"""Compare two objects attributes"""
# not attributes given return False by default
Please sign in to comment.
Something went wrong with that request. Please try again.