Skip to content

Commit

Permalink
Merge pull request #1447 from jdemaeyer/fix/redirectmiddleware-respec…
Browse files Browse the repository at this point in the history
…t-meta-attributes

[MRG +1] Fix RedirectMiddleware not honouring handle_httpstatus meta keys
  • Loading branch information
dangra committed Aug 22, 2015
2 parents 280eab2 + d164398 commit 489c76b
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 1 deletion.
6 changes: 6 additions & 0 deletions docs/topics/downloader-middleware.rst
Original file line number Diff line number Diff line change
Expand Up @@ -724,6 +724,12 @@ responses (and pass them through to your spider) you can do this::
class MySpider(CrawlSpider):
handle_httpstatus_list = [301, 302]

The ``handle_httpstatus_list`` key of :attr:`Request.meta
<scrapy.http.Request.meta>` can also be used to specify which response codes to
allow on a per-request basis. You can also set the meta key
``handle_httpstatus_all`` to ``True`` if you want to allow any response code
for a request.


RedirectMiddleware settings
~~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand Down
4 changes: 3 additions & 1 deletion scrapy/downloadermiddlewares/redirect.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,9 @@ class RedirectMiddleware(BaseRedirectMiddleware):

def process_response(self, request, response, spider):
if (request.meta.get('dont_redirect', False) or
response.status in getattr(spider, 'handle_httpstatus_list', [])):
response.status in getattr(spider, 'handle_httpstatus_list', []) or
response.status in request.meta.get('handle_httpstatus_list', []) or
request.meta.get('handle_httpstatus_all', False)):
return response

if request.method == 'HEAD':
Expand Down
11 changes: 11 additions & 0 deletions tests/test_downloadermiddleware_redirect.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,17 @@ def test_spider_handling(self):
r = self.mw.process_response(req, rsp, smartspider)
self.assertIs(r, rsp)

def test_request_meta_handling(self):
url = 'http://www.example.com/301'
url2 = 'http://www.example.com/redirected'
def _test_passthrough(req):
rsp = Response(url, headers={'Location': url2}, status=301, request=req)
r = self.mw.process_response(req, rsp, self.spider)
self.assertIs(r, rsp)
_test_passthrough(Request(url, meta={'handle_httpstatus_list':
[404, 301, 302]}))
_test_passthrough(Request(url, meta={'handle_httpstatus_all': True}))


class MetaRefreshMiddlewareTest(unittest.TestCase):

Expand Down

0 comments on commit 489c76b

Please sign in to comment.