Skip to content
This repository
Fetching contributors…

Octocat-spinner-32-eaf2f5

Cannot retrieve contributors at this time

file 79 lines (61 sloc) 2.523 kb
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
from collections import defaultdict
from functools import wraps

from scrapy.command import ScrapyCommand
from scrapy.contracts import ContractsManager
from scrapy.utils.misc import load_object
from scrapy.utils.spider import iterate_spider_output
from scrapy.utils.conf import build_component_list


def _generate(cb):
    """ create a callback which does not return anything """
    @wraps(cb)
    def wrapper(response):
        output = cb(response)
        output = list(iterate_spider_output(output))
    return wrapper


class Command(ScrapyCommand):
    requires_project = True
    default_settings = {'LOG_ENABLED': False}

    def syntax(self):
        return "[options] <spider>"

    def short_desc(self):
        return "Check contracts for given spider"

    def add_options(self, parser):
        ScrapyCommand.add_options(self, parser)
        parser.add_option("-l", "--list", dest="list", action="store_true",
            help="only list contracts, without checking them")

    def run(self, args, opts):
        # load contracts
        contracts = build_component_list(
            self.settings['SPIDER_CONTRACTS_BASE'],
            self.settings['SPIDER_CONTRACTS'],
        )
        self.conman = ContractsManager([load_object(c) for c in contracts])

        # contract requests
        contract_reqs = defaultdict(list)
        self.crawler.engine.has_capacity = lambda: True

        for spider in args or self.crawler.spiders.list():
            spider = self.crawler.spiders.create(spider)
            requests = self.get_requests(spider)

            if opts.list:
                for req in requests:
                    contract_reqs[spider.name].append(req.callback.__name__)
            else:
                self.crawler.crawl(spider, requests)

        # start checks
        if opts.list:
            for spider, methods in sorted(contract_reqs.iteritems()):
                print spider
                for method in sorted(methods):
                    print ' * %s' % method
        else:
            self.crawler.start()

    def get_requests(self, spider):
        requests = []

        for key, value in vars(type(spider)).items():
            if callable(value) and value.__doc__:
                bound_method = value.__get__(spider, type(spider))
                request = self.conman.from_method(bound_method)

                if request:
                    request.callback = _generate(request.callback)
                    requests.append(request)

        return requests
Something went wrong with that request. Please try again.