-
Notifications
You must be signed in to change notification settings - Fork 10.4k
/
test_downloadermiddleware.py
93 lines (76 loc) · 3.44 KB
/
test_downloadermiddleware.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
from twisted.trial.unittest import TestCase
from twisted.python.failure import Failure
from scrapy.http import Request, Response
from scrapy.spider import BaseSpider
from scrapy.core.downloader.middleware import DownloaderMiddlewareManager
from scrapy.utils.test import get_crawler
class ManagerTestCase(TestCase):
settings_dict = None
def setUp(self):
self.crawler = get_crawler(self.settings_dict)
self.spider = BaseSpider('foo')
self.spider.set_crawler(self.crawler)
self.mwman = DownloaderMiddlewareManager.from_crawler(self.crawler)
# some mw depends on stats collector
self.crawler.stats.open_spider(self.spider)
return self.mwman.open_spider(self.spider)
def tearDown(self):
self.crawler.stats.close_spider(self.spider, '')
return self.mwman.close_spider(self.spider)
def _download(self, request, response=None):
"""Executes downloader mw manager's download method and returns
the result (Request or Response) or raise exception in case of
failure.
"""
if not response:
response = Response(request.url)
def download_func(**kwargs):
return response
dfd = self.mwman.download(download_func, request, self.spider)
# catch deferred result and return the value
results = []
dfd.addBoth(results.append)
self._wait(dfd)
ret = results[0]
if isinstance(ret, Failure):
ret.raiseException()
return ret
class DefaultsTest(ManagerTestCase):
"""Tests default behavior with default settings"""
def test_request_response(self):
req = Request('http://example.com/index.html')
resp = Response(req.url, status=200)
ret = self._download(req, resp)
self.assertTrue(isinstance(ret, Response), "Non-response returned")
def test_3xx_and_invalid_gzipped_body_must_redirect(self):
"""Regression test for a failure when redirecting a compressed
request.
This happens when httpcompression middleware is executed before redirect
middleware and attempts to decompress a non-compressed body.
In particular when some website returns a 30x response with header
'Content-Encoding: gzip' giving as result the error below:
exceptions.IOError: Not a gzipped file
"""
req = Request('http://example.com')
body = '<p>You are being redirected</p>'
resp = Response(req.url, status=302, body=body, headers={
'Content-Length': len(body),
'Content-Type': 'text/html',
'Content-Encoding': 'gzip',
'Location': 'http://example.com/login',
})
ret = self._download(request=req, response=resp)
self.assertTrue(isinstance(ret, Request),
"Not redirected: {0!r}".format(ret))
self.assertEqual(ret.url, resp.headers['Location'],
"Not redirected to location header")
def test_200_and_invalid_gzipped_body_must_fail(self):
req = Request('http://example.com')
body = '<p>You are being redirected</p>'
resp = Response(req.url, status=200, body=body, headers={
'Content-Length': len(body),
'Content-Type': 'text/html',
'Content-Encoding': 'gzip',
'Location': 'http://example.com/login',
})
self.assertRaises(IOError, self._download, request=req, response=resp)