From 63acd0720970c87450fdbcb9aa6967118c9c1cf2 Mon Sep 17 00:00:00 2001 From: Laerte Pereira Date: Wed, 28 Feb 2024 16:14:08 -0300 Subject: [PATCH 1/2] Fix and re-enable unnecessary-comprehension and use-dict-literal pylint tags --- pylintrc | 2 -- scrapy/downloadermiddlewares/httpcompression.py | 2 +- scrapy/spiders/crawl.py | 2 +- scrapy/utils/python.py | 2 +- 4 files changed, 3 insertions(+), 5 deletions(-) diff --git a/pylintrc b/pylintrc index 78004e78ac8..c60e4e16a33 100644 --- a/pylintrc +++ b/pylintrc @@ -68,7 +68,6 @@ disable=abstract-method, too-many-public-methods, too-many-return-statements, unbalanced-tuple-unpacking, - unnecessary-comprehension, unnecessary-dunder-call, unnecessary-pass, unreachable, @@ -77,7 +76,6 @@ disable=abstract-method, unused-private-member, unused-variable, unused-wildcard-import, - use-dict-literal, used-before-assignment, useless-return, wildcard-import, diff --git a/scrapy/downloadermiddlewares/httpcompression.py b/scrapy/downloadermiddlewares/httpcompression.py index f0ad24f72a6..aa3abe85379 100644 --- a/scrapy/downloadermiddlewares/httpcompression.py +++ b/scrapy/downloadermiddlewares/httpcompression.py @@ -135,7 +135,7 @@ def process_response( respcls = responsetypes.from_args( headers=response.headers, url=response.url, body=decoded_body ) - kwargs = dict(cls=respcls, body=decoded_body) + kwargs = {"cls": respcls, "body": decoded_body} if issubclass(respcls, TextResponse): # force recalculating the encoding until we make sure the # responsetypes guessing is reliable diff --git a/scrapy/spiders/crawl.py b/scrapy/spiders/crawl.py index ebb4f598456..2a3913da582 100644 --- a/scrapy/spiders/crawl.py +++ b/scrapy/spiders/crawl.py @@ -85,7 +85,7 @@ def _build_request(self, rule_index, link): url=link.url, callback=self._callback, errback=self._errback, - meta=dict(rule=rule_index, link_text=link.text), + meta={"rule": rule_index, "link_text": link.text}, ) def _requests_to_follow(self, response): diff --git a/scrapy/utils/python.py b/scrapy/utils/python.py index 7b408c49cf4..1e7364e494d 100644 --- a/scrapy/utils/python.py +++ b/scrapy/utils/python.py @@ -162,7 +162,7 @@ def _chunk_iter() -> Generator[Tuple[str, int], Any, None]: pattern = re.compile(pattern) for chunk, offset in _chunk_iter(): - matches = [match for match in pattern.finditer(chunk)] + matches = list(pattern.finditer(chunk)) if matches: start, end = matches[-1].span() return offset + start, offset + end From 26a16f2c43dc96fe33d0b0fc8846402e9ae97e9a Mon Sep 17 00:00:00 2001 From: Laerte Pereira Date: Wed, 28 Feb 2024 16:36:19 -0300 Subject: [PATCH 2/2] Fix tests --- tests/test_crawl.py | 10 ++--- tests/test_downloadermiddleware_cookies.py | 2 +- tests/test_downloadermiddleware_httpauth.py | 4 +- tests/test_exporters.py | 24 +++++------ tests/test_linkextractors.py | 38 ++++++++-------- tests/test_loader_deprecated.py | 20 ++++----- tests/test_mail.py | 2 +- tests/test_pipeline_crawl.py | 2 +- tests/test_pipeline_files.py | 2 +- tests/test_pipeline_images.py | 18 ++++---- tests/test_pipeline_media.py | 48 ++++++++++----------- tests/test_scheduler.py | 26 +++++------ tests/test_spidermiddleware_offsite.py | 17 +++++--- tests/test_utils_iterators.py | 16 +++---- tests/test_utils_template.py | 2 +- 15 files changed, 117 insertions(+), 114 deletions(-) diff --git a/tests/test_crawl.py b/tests/test_crawl.py index 96d43b2b96d..6cde4ed8c50 100644 --- a/tests/test_crawl.py +++ b/tests/test_crawl.py @@ -76,11 +76,11 @@ def test_randomized_delay(self): @defer.inlineCallbacks def _test_delay(self, total, delay, randomize=False): - crawl_kwargs = dict( - maxlatency=delay * 2, - mockserver=self.mockserver, - total=total, - ) + crawl_kwargs = { + "maxlatency": delay * 2, + "mockserver": self.mockserver, + "total": total, + } tolerance = 1 - (0.6 if randomize else 0.2) settings = {"DOWNLOAD_DELAY": delay, "RANDOMIZE_DOWNLOAD_DELAY": randomize} diff --git a/tests/test_downloadermiddleware_cookies.py b/tests/test_downloadermiddleware_cookies.py index 4a81a638ee1..425fabcc7a8 100644 --- a/tests/test_downloadermiddleware_cookies.py +++ b/tests/test_downloadermiddleware_cookies.py @@ -320,7 +320,7 @@ def test_local_domain(self): @pytest.mark.xfail(reason="Cookie header is not currently being processed") def test_keep_cookie_from_default_request_headers_middleware(self): - DEFAULT_REQUEST_HEADERS = dict(Cookie="default=value; asdf=qwerty") + DEFAULT_REQUEST_HEADERS = {"Cookie": "default=value; asdf=qwerty"} mw_default_headers = DefaultHeadersMiddleware(DEFAULT_REQUEST_HEADERS.items()) # overwrite with values from 'cookies' request argument req1 = Request("http://example.org", cookies={"default": "something"}) diff --git a/tests/test_downloadermiddleware_httpauth.py b/tests/test_downloadermiddleware_httpauth.py index fc110e6cc2c..500af65364a 100644 --- a/tests/test_downloadermiddleware_httpauth.py +++ b/tests/test_downloadermiddleware_httpauth.py @@ -59,7 +59,7 @@ def test_auth_subdomain(self): self.assertEqual(req.headers["Authorization"], basic_auth_header("foo", "bar")) def test_auth_already_set(self): - req = Request("http://example.com/", headers=dict(Authorization="Digest 123")) + req = Request("http://example.com/", headers={"Authorization": "Digest 123"}) assert self.mw.process_request(req, self.spider) is None self.assertEqual(req.headers["Authorization"], b"Digest 123") @@ -79,6 +79,6 @@ def test_auth(self): self.assertEqual(req.headers["Authorization"], basic_auth_header("foo", "bar")) def test_auth_already_set(self): - req = Request("http://example.com/", headers=dict(Authorization="Digest 123")) + req = Request("http://example.com/", headers={"Authorization": "Digest 123"}) assert self.mw.process_request(req, self.spider) is None self.assertEqual(req.headers["Authorization"], b"Digest 123") diff --git a/tests/test_exporters.py b/tests/test_exporters.py index 59b724495d1..fa938904412 100644 --- a/tests/test_exporters.py +++ b/tests/test_exporters.py @@ -152,7 +152,7 @@ def test_invalid_option(self): def test_nested_item(self): i1 = self.item_class(name="Joseph", age="22") - i2 = dict(name="Maria", age=i1) + i2 = {"name": "Maria", "age": i1} i3 = self.item_class(name="Jesus", age=i2) ie = self._get_exporter() exported = ie.export_item(i3) @@ -185,7 +185,7 @@ def test_export_list(self): def test_export_item_dict_list(self): i1 = self.item_class(name="Joseph", age="22") - i2 = dict(name="Maria", age=[i1]) + i2 = {"name": "Maria", "age": [i1]} i3 = self.item_class(name="Jesus", age=[i2]) ie = self._get_exporter() exported = ie.export_item(i3) @@ -373,7 +373,7 @@ class TestItem2(Item): def test_join_multivalue_not_strings(self): self.assertExportResult( - item=dict(name="John", friends=[4, 8]), + item={"name": "John", "friends": [4, 8]}, include_headers_line=False, expected='"[4, 8]",John\r\n', ) @@ -388,14 +388,14 @@ def test_nonstring_types_item(self): def test_errors_default(self): with self.assertRaises(UnicodeEncodeError): self.assertExportResult( - item=dict(text="W\u0275\u200Brd"), + item={"text": "W\u0275\u200Brd"}, expected=None, encoding="windows-1251", ) def test_errors_xmlcharrefreplace(self): self.assertExportResult( - item=dict(text="W\u0275\u200Brd"), + item={"text": "W\u0275\u200Brd"}, include_headers_line=False, expected="Wɵ​rd\r\n", encoding="windows-1251", @@ -455,8 +455,8 @@ def test_multivalued_fields(self): ) def test_nested_item(self): - i1 = dict(name="foo\xa3hoo", age="22") - i2 = dict(name="bar", age=i1) + i1 = {"name": "foo\xa3hoo", "age": "22"} + i2 = {"name": "bar", "age": i1} i3 = self.item_class(name="buz", age=i2) self.assertExportResult( @@ -478,8 +478,8 @@ def test_nested_item(self): ) def test_nested_list_item(self): - i1 = dict(name="foo") - i2 = dict(name="bar", v2={"egg": ["spam"]}) + i1 = {"name": "foo"} + i2 = {"name": "bar", "v2": {"egg": ["spam"]}} i3 = self.item_class(name="buz", age=[i1, i2]) self.assertExportResult( @@ -534,7 +534,7 @@ def _check_output(self): def test_nested_item(self): i1 = self.item_class(name="Joseph", age="22") - i2 = dict(name="Maria", age=i1) + i2 = {"name": "Maria", "age": i1} i3 = self.item_class(name="Jesus", age=i2) self.ie.start_exporting() self.ie.export_item(i3) @@ -622,9 +622,9 @@ def test_nested_item(self): self.assertEqual(exported, [expected]) def test_nested_dict_item(self): - i1 = dict(name="Joseph\xa3", age="22") + i1 = {"name": "Joseph\xa3", "age": "22"} i2 = self.item_class(name="Maria", age=i1) - i3 = dict(name="Jesus", age=i2) + i3 = {"name": "Jesus", "age": i2} self.ie.start_exporting() self.ie.export_item(i3) self.ie.finish_exporting() diff --git a/tests/test_linkextractors.py b/tests/test_linkextractors.py index 6b4df90d888..217c7a29904 100644 --- a/tests/test_linkextractors.py +++ b/tests/test_linkextractors.py @@ -37,7 +37,7 @@ def test_extract_all_links(self): page4_url = "http://example.com/page%204.html" self.assertEqual( - [link for link in lx.extract_links(self.response)], + list(lx.extract_links(self.response)), [ Link(url="http://example.com/sample1.html", text=""), Link(url="http://example.com/sample2.html", text="sample 2"), @@ -55,7 +55,7 @@ def test_extract_all_links(self): def test_extract_filter_allow(self): lx = self.extractor_cls(allow=("sample",)) self.assertEqual( - [link for link in lx.extract_links(self.response)], + list(lx.extract_links(self.response)), [ Link(url="http://example.com/sample1.html", text=""), Link(url="http://example.com/sample2.html", text="sample 2"), @@ -70,7 +70,7 @@ def test_extract_filter_allow(self): def test_extract_filter_allow_with_duplicates(self): lx = self.extractor_cls(allow=("sample",), unique=False) self.assertEqual( - [link for link in lx.extract_links(self.response)], + list(lx.extract_links(self.response)), [ Link(url="http://example.com/sample1.html", text=""), Link(url="http://example.com/sample2.html", text="sample 2"), @@ -93,7 +93,7 @@ def test_extract_filter_allow_with_duplicates(self): def test_extract_filter_allow_with_duplicates_canonicalize(self): lx = self.extractor_cls(allow=("sample",), unique=False, canonicalize=True) self.assertEqual( - [link for link in lx.extract_links(self.response)], + list(lx.extract_links(self.response)), [ Link(url="http://example.com/sample1.html", text=""), Link(url="http://example.com/sample2.html", text="sample 2"), @@ -116,7 +116,7 @@ def test_extract_filter_allow_with_duplicates_canonicalize(self): def test_extract_filter_allow_no_duplicates_canonicalize(self): lx = self.extractor_cls(allow=("sample",), unique=True, canonicalize=True) self.assertEqual( - [link for link in lx.extract_links(self.response)], + list(lx.extract_links(self.response)), [ Link(url="http://example.com/sample1.html", text=""), Link(url="http://example.com/sample2.html", text="sample 2"), @@ -127,7 +127,7 @@ def test_extract_filter_allow_no_duplicates_canonicalize(self): def test_extract_filter_allow_and_deny(self): lx = self.extractor_cls(allow=("sample",), deny=("3",)) self.assertEqual( - [link for link in lx.extract_links(self.response)], + list(lx.extract_links(self.response)), [ Link(url="http://example.com/sample1.html", text=""), Link(url="http://example.com/sample2.html", text="sample 2"), @@ -137,7 +137,7 @@ def test_extract_filter_allow_and_deny(self): def test_extract_filter_allowed_domains(self): lx = self.extractor_cls(allow_domains=("google.com",)) self.assertEqual( - [link for link in lx.extract_links(self.response)], + list(lx.extract_links(self.response)), [ Link(url="http://www.google.com/something", text=""), ], @@ -148,7 +148,7 @@ def test_extraction_using_single_values(self): lx = self.extractor_cls(allow="sample") self.assertEqual( - [link for link in lx.extract_links(self.response)], + list(lx.extract_links(self.response)), [ Link(url="http://example.com/sample1.html", text=""), Link(url="http://example.com/sample2.html", text="sample 2"), @@ -162,7 +162,7 @@ def test_extraction_using_single_values(self): lx = self.extractor_cls(allow="sample", deny="3") self.assertEqual( - [link for link in lx.extract_links(self.response)], + list(lx.extract_links(self.response)), [ Link(url="http://example.com/sample1.html", text=""), Link(url="http://example.com/sample2.html", text="sample 2"), @@ -171,7 +171,7 @@ def test_extraction_using_single_values(self): lx = self.extractor_cls(allow_domains="google.com") self.assertEqual( - [link for link in lx.extract_links(self.response)], + list(lx.extract_links(self.response)), [ Link(url="http://www.google.com/something", text=""), ], @@ -179,7 +179,7 @@ def test_extraction_using_single_values(self): lx = self.extractor_cls(deny_domains="example.com") self.assertEqual( - [link for link in lx.extract_links(self.response)], + list(lx.extract_links(self.response)), [ Link(url="http://www.google.com/something", text=""), ], @@ -265,7 +265,7 @@ def test_matches(self): def test_restrict_xpaths(self): lx = self.extractor_cls(restrict_xpaths=('//div[@id="subwrapper"]',)) self.assertEqual( - [link for link in lx.extract_links(self.response)], + list(lx.extract_links(self.response)), [ Link(url="http://example.com/sample1.html", text=""), Link(url="http://example.com/sample2.html", text="sample 2"), @@ -337,7 +337,7 @@ def test_restrict_css_and_restrict_xpaths_together(self): restrict_css=("#subwrapper + a",), ) self.assertEqual( - [link for link in lx.extract_links(self.response)], + list(lx.extract_links(self.response)), [ Link(url="http://example.com/sample1.html", text=""), Link(url="http://example.com/sample2.html", text="sample 2"), @@ -705,7 +705,7 @@ def test_link_wrong_href(self): response = HtmlResponse("http://example.org/index.html", body=html) lx = self.extractor_cls() self.assertEqual( - [link for link in lx.extract_links(response)], + list(lx.extract_links(response)), [ Link( url="http://example.org/item1.html", @@ -758,7 +758,7 @@ def test_link_wrong_href(self): response = HtmlResponse("http://example.org/index.html", body=html) lx = self.extractor_cls() self.assertEqual( - [link for link in lx.extract_links(response)], + list(lx.extract_links(response)), [ Link( url="http://example.org/item1.html", text="Item 1", nofollow=False @@ -779,7 +779,7 @@ def test_link_restrict_text(self): # Simple text inclusion test lx = self.extractor_cls(restrict_text="dog") self.assertEqual( - [link for link in lx.extract_links(response)], + list(lx.extract_links(response)), [ Link( url="http://example.org/item2.html", @@ -791,7 +791,7 @@ def test_link_restrict_text(self): # Unique regex test lx = self.extractor_cls(restrict_text=r"of.*dog") self.assertEqual( - [link for link in lx.extract_links(response)], + list(lx.extract_links(response)), [ Link( url="http://example.org/item2.html", @@ -803,7 +803,7 @@ def test_link_restrict_text(self): # Multiple regex test lx = self.extractor_cls(restrict_text=[r"of.*dog", r"of.*cat"]) self.assertEqual( - [link for link in lx.extract_links(response)], + list(lx.extract_links(response)), [ Link( url="http://example.org/item1.html", @@ -834,7 +834,7 @@ def test_skip_bad_links(self): response = HtmlResponse("http://example.org/index.html", body=html) lx = self.extractor_cls() self.assertEqual( - [link for link in lx.extract_links(response)], + list(lx.extract_links(response)), [ Link( url="http://example.org/item2.html", diff --git a/tests/test_loader_deprecated.py b/tests/test_loader_deprecated.py index 99cdf88d96f..528efa142a7 100644 --- a/tests/test_loader_deprecated.py +++ b/tests/test_loader_deprecated.py @@ -565,37 +565,37 @@ class NoInputReprocessingFromDictTest(unittest.TestCase): """ def test_avoid_reprocessing_with_initial_values_single(self): - il = NoInputReprocessingDictLoader(item=dict(title="foo")) + il = NoInputReprocessingDictLoader(item={"title": "foo"}) il_loaded = il.load_item() - self.assertEqual(il_loaded, dict(title="foo")) + self.assertEqual(il_loaded, {"title": "foo"}) self.assertEqual( - NoInputReprocessingDictLoader(item=il_loaded).load_item(), dict(title="foo") + NoInputReprocessingDictLoader(item=il_loaded).load_item(), {"title": "foo"} ) def test_avoid_reprocessing_with_initial_values_list(self): - il = NoInputReprocessingDictLoader(item=dict(title=["foo", "bar"])) + il = NoInputReprocessingDictLoader(item={"title": ["foo", "bar"]}) il_loaded = il.load_item() - self.assertEqual(il_loaded, dict(title="foo")) + self.assertEqual(il_loaded, {"title": "foo"}) self.assertEqual( - NoInputReprocessingDictLoader(item=il_loaded).load_item(), dict(title="foo") + NoInputReprocessingDictLoader(item=il_loaded).load_item(), {"title": "foo"} ) def test_avoid_reprocessing_without_initial_values_single(self): il = NoInputReprocessingDictLoader() il.add_value("title", "foo") il_loaded = il.load_item() - self.assertEqual(il_loaded, dict(title="FOO")) + self.assertEqual(il_loaded, {"title": "FOO"}) self.assertEqual( - NoInputReprocessingDictLoader(item=il_loaded).load_item(), dict(title="FOO") + NoInputReprocessingDictLoader(item=il_loaded).load_item(), {"title": "FOO"} ) def test_avoid_reprocessing_without_initial_values_list(self): il = NoInputReprocessingDictLoader() il.add_value("title", ["foo", "bar"]) il_loaded = il.load_item() - self.assertEqual(il_loaded, dict(title="FOO")) + self.assertEqual(il_loaded, {"title": "FOO"}) self.assertEqual( - NoInputReprocessingDictLoader(item=il_loaded).load_item(), dict(title="FOO") + NoInputReprocessingDictLoader(item=il_loaded).load_item(), {"title": "FOO"} ) diff --git a/tests/test_mail.py b/tests/test_mail.py index 2535e58db26..ff15053978a 100644 --- a/tests/test_mail.py +++ b/tests/test_mail.py @@ -91,7 +91,7 @@ def test_send_attach(self): self.assertEqual(attach.get_payload(decode=True), b"content") def _catch_mail_sent(self, **kwargs): - self.catched_msg = dict(**kwargs) + self.catched_msg = {**kwargs} def test_send_utf8(self): subject = "sübjèçt" diff --git a/tests/test_pipeline_crawl.py b/tests/test_pipeline_crawl.py index be9811980df..5a9a217cee3 100644 --- a/tests/test_pipeline_crawl.py +++ b/tests/test_pipeline_crawl.py @@ -140,7 +140,7 @@ def _assert_files_download_failure(self, crawler, items, code, logs): self.assertEqual(logs.count(file_dl_failure), 3) # check that no files were written to the media store - self.assertEqual([x for x in self.tmpmediastore.iterdir()], []) + self.assertEqual(list(self.tmpmediastore.iterdir()), []) @defer.inlineCallbacks def test_download_media(self): diff --git a/tests/test_pipeline_files.py b/tests/test_pipeline_files.py index e7000e3140c..0babde4d90f 100644 --- a/tests/test_pipeline_files.py +++ b/tests/test_pipeline_files.py @@ -221,7 +221,7 @@ def file_path(self, request, response=None, info=None, item=None): file_path = CustomFilesPipeline.from_settings( Settings({"FILES_STORE": self.tempdir}) ).file_path - item = dict(path="path-to-store-file") + item = {"path": "path-to-store-file"} request = Request("http://example.com") self.assertEqual(file_path(request, item=item), "full/path-to-store-file") diff --git a/tests/test_pipeline_images.py b/tests/test_pipeline_images.py index 2e2e06b89a9..18a2454b3db 100644 --- a/tests/test_pipeline_images.py +++ b/tests/test_pipeline_images.py @@ -132,7 +132,7 @@ def thumb_path( thumb_path = CustomImagesPipeline.from_settings( Settings({"IMAGES_STORE": self.tempdir}) ).thumb_path - item = dict(path="path-to-store-file") + item = {"path": "path-to-store-file"} request = Request("http://example.com") self.assertEqual( thumb_path(request, "small", item=item), "thumb/small/path-to-store-file" @@ -433,14 +433,14 @@ class ImagesPipelineTestCaseCustomSettings(unittest.TestCase): ] # This should match what is defined in ImagesPipeline. - default_pipeline_settings = dict( - MIN_WIDTH=0, - MIN_HEIGHT=0, - EXPIRES=90, - THUMBS={}, - IMAGES_URLS_FIELD="image_urls", - IMAGES_RESULT_FIELD="images", - ) + default_pipeline_settings = { + "MIN_WIDTH": 0, + "MIN_HEIGHT": 0, + "EXPIRES": 90, + "THUMBS": {}, + "IMAGES_URLS_FIELD": "image_urls", + "IMAGES_RESULT_FIELD": "images", + } def setUp(self): self.tempdir = mkdtemp() diff --git a/tests/test_pipeline_media.py b/tests/test_pipeline_media.py index d477b59be40..d4dde4a4036 100644 --- a/tests/test_pipeline_media.py +++ b/tests/test_pipeline_media.py @@ -59,7 +59,7 @@ def test_default_media_to_download(self): assert self.pipe.media_to_download(request, self.info) is None def test_default_get_media_requests(self): - item = dict(name="name") + item = {"name": "name"} assert self.pipe.get_media_requests(item, self.info) is None def test_default_media_downloaded(self): @@ -73,7 +73,7 @@ def test_default_media_failed(self): assert self.pipe.media_failed(fail, request, self.info) is fail def test_default_item_completed(self): - item = dict(name="name") + item = {"name": "name"} assert self.pipe.item_completed([], item, self.info) is item # Check that failures are logged by default @@ -98,7 +98,7 @@ def test_default_item_completed(self): @inlineCallbacks def test_default_process_item(self): - item = dict(name="name") + item = {"name": "name"} new_item = yield self.pipe.process_item(item, self.spider) assert new_item is item @@ -226,11 +226,11 @@ def test_result_succeed(self): rsp = Response("http://url1") req = Request( "http://url1", - meta=dict(response=rsp), + meta={"response": rsp}, callback=self._callback, errback=self._errback, ) - item = dict(requests=req) + item = {"requests": req} new_item = yield self.pipe.process_item(item, self.spider) self.assertEqual(new_item["results"], [(True, rsp)]) self.assertEqual( @@ -250,11 +250,11 @@ def test_result_failure(self): fail = Failure(Exception()) req = Request( "http://url1", - meta=dict(response=fail), + meta={"response": fail}, callback=self._callback, errback=self._errback, ) - item = dict(requests=req) + item = {"requests": req} new_item = yield self.pipe.process_item(item, self.spider) self.assertEqual(new_item["results"], [(False, fail)]) self.assertEqual( @@ -272,10 +272,10 @@ def test_result_failure(self): def test_mix_of_success_and_failure(self): self.pipe.LOG_FAILED_RESULTS = False rsp1 = Response("http://url1") - req1 = Request("http://url1", meta=dict(response=rsp1)) + req1 = Request("http://url1", meta={"response": rsp1}) fail = Failure(Exception()) - req2 = Request("http://url2", meta=dict(response=fail)) - item = dict(requests=[req1, req2]) + req2 = Request("http://url2", meta={"response": fail}) + item = {"requests": [req1, req2]} new_item = yield self.pipe.process_item(item, self.spider) self.assertEqual(new_item["results"], [(True, rsp1), (False, fail)]) m = self.pipe._mockcalled @@ -294,7 +294,7 @@ def test_mix_of_success_and_failure(self): def test_get_media_requests(self): # returns single Request (without callback) req = Request("http://url") - item = dict(requests=req) # pass a single item + item = {"requests": req} # pass a single item new_item = yield self.pipe.process_item(item, self.spider) assert new_item is item self.assertIn(self.fingerprint(req), self.info.downloaded) @@ -302,7 +302,7 @@ def test_get_media_requests(self): # returns iterable of Requests req1 = Request("http://url1") req2 = Request("http://url2") - item = dict(requests=iter([req1, req2])) + item = {"requests": iter([req1, req2])} new_item = yield self.pipe.process_item(item, self.spider) assert new_item is item assert self.fingerprint(req1) in self.info.downloaded @@ -311,17 +311,17 @@ def test_get_media_requests(self): @inlineCallbacks def test_results_are_cached_across_multiple_items(self): rsp1 = Response("http://url1") - req1 = Request("http://url1", meta=dict(response=rsp1)) - item = dict(requests=req1) + req1 = Request("http://url1", meta={"response": rsp1}) + item = {"requests": req1} new_item = yield self.pipe.process_item(item, self.spider) self.assertTrue(new_item is item) self.assertEqual(new_item["results"], [(True, rsp1)]) # rsp2 is ignored, rsp1 must be in results because request fingerprints are the same req2 = Request( - req1.url, meta=dict(response=Response("http://donot.download.me")) + req1.url, meta={"response": Response("http://donot.download.me")} ) - item = dict(requests=req2) + item = {"requests": req2} new_item = yield self.pipe.process_item(item, self.spider) self.assertTrue(new_item is item) self.assertEqual(self.fingerprint(req1), self.fingerprint(req2)) @@ -330,11 +330,11 @@ def test_results_are_cached_across_multiple_items(self): @inlineCallbacks def test_results_are_cached_for_requests_of_single_item(self): rsp1 = Response("http://url1") - req1 = Request("http://url1", meta=dict(response=rsp1)) + req1 = Request("http://url1", meta={"response": rsp1}) req2 = Request( - req1.url, meta=dict(response=Response("http://donot.download.me")) + req1.url, meta={"response": Response("http://donot.download.me")} ) - item = dict(requests=[req1, req2]) + item = {"requests": [req1, req2]} new_item = yield self.pipe.process_item(item, self.spider) self.assertTrue(new_item is item) self.assertEqual(new_item["results"], [(True, rsp1), (True, rsp1)]) @@ -359,16 +359,16 @@ def rsp1_func(): def rsp2_func(): self.fail("it must cache rsp1 result and must not try to redownload") - req1 = Request("http://url", meta=dict(response=rsp1_func)) - req2 = Request(req1.url, meta=dict(response=rsp2_func)) - item = dict(requests=[req1, req2]) + req1 = Request("http://url", meta={"response": rsp1_func}) + req2 = Request(req1.url, meta={"response": rsp2_func}) + item = {"requests": [req1, req2]} new_item = yield self.pipe.process_item(item, self.spider) self.assertEqual(new_item["results"], [(True, rsp1), (True, rsp1)]) @inlineCallbacks def test_use_media_to_download_result(self): - req = Request("http://url", meta=dict(result="ITSME", response=self.fail)) - item = dict(requests=req) + req = Request("http://url", meta={"result": "ITSME", "response": self.fail}) + item = {"requests": req} new_item = yield self.pipe.process_item(item, self.spider) self.assertEqual(new_item["results"], [(True, "ITSME")]) self.assertEqual( diff --git a/tests/test_scheduler.py b/tests/test_scheduler.py index f8465a5ffed..37099dae676 100644 --- a/tests/test_scheduler.py +++ b/tests/test_scheduler.py @@ -45,15 +45,15 @@ def close(self): class MockCrawler(Crawler): def __init__(self, priority_queue_cls, jobdir): - settings = dict( - SCHEDULER_DEBUG=False, - SCHEDULER_DISK_QUEUE="scrapy.squeues.PickleLifoDiskQueue", - SCHEDULER_MEMORY_QUEUE="scrapy.squeues.LifoMemoryQueue", - SCHEDULER_PRIORITY_QUEUE=priority_queue_cls, - JOBDIR=jobdir, - DUPEFILTER_CLASS="scrapy.dupefilters.BaseDupeFilter", - REQUEST_FINGERPRINTER_IMPLEMENTATION="2.7", - ) + settings = { + "SCHEDULER_DEBUG": False, + "SCHEDULER_DISK_QUEUE": "scrapy.squeues.PickleLifoDiskQueue", + "SCHEDULER_MEMORY_QUEUE": "scrapy.squeues.LifoMemoryQueue", + "SCHEDULER_PRIORITY_QUEUE": priority_queue_cls, + "JOBDIR": jobdir, + "DUPEFILTER_CLASS": "scrapy.dupefilters.BaseDupeFilter", + "REQUEST_FINGERPRINTER_IMPLEMENTATION": "2.7", + } super().__init__(Spider, settings) self.engine = MockEngine(downloader=MockDownloader()) self.stats = load_object(self.settings["STATS_CLASS"])(self) @@ -338,10 +338,10 @@ def test_integration_downloader_aware_priority_queue(self): class TestIncompatibility(unittest.TestCase): def _incompatible(self): - settings = dict( - SCHEDULER_PRIORITY_QUEUE="scrapy.pqueues.DownloaderAwarePriorityQueue", - CONCURRENT_REQUESTS_PER_IP=1, - ) + settings = { + "SCHEDULER_PRIORITY_QUEUE": "scrapy.pqueues.DownloaderAwarePriorityQueue", + "CONCURRENT_REQUESTS_PER_IP": 1, + } crawler = get_crawler(Spider, settings) scheduler = Scheduler.from_crawler(crawler) spider = Spider(name="spider") diff --git a/tests/test_spidermiddleware_offsite.py b/tests/test_spidermiddleware_offsite.py index ea45b769869..837f1c2c8f5 100644 --- a/tests/test_spidermiddleware_offsite.py +++ b/tests/test_spidermiddleware_offsite.py @@ -16,10 +16,10 @@ def setUp(self): self.mw.spider_opened(self.spider) def _get_spiderargs(self): - return dict( - name="foo", - allowed_domains=["scrapytest.org", "scrapy.org", "scrapy.test.org"], - ) + return { + "name": "foo", + "allowed_domains": ["scrapytest.org", "scrapy.org", "scrapy.test.org"], + } def test_process_spider_output(self): res = Response("http://scrapytest.org") @@ -50,7 +50,7 @@ def test_process_spider_output(self): class TestOffsiteMiddleware2(TestOffsiteMiddleware): def _get_spiderargs(self): - return dict(name="foo", allowed_domains=None) + return {"name": "foo", "allowed_domains": None} def test_process_spider_output(self): res = Response("http://scrapytest.org") @@ -61,13 +61,16 @@ def test_process_spider_output(self): class TestOffsiteMiddleware3(TestOffsiteMiddleware2): def _get_spiderargs(self): - return dict(name="foo") + return {"name": "foo"} class TestOffsiteMiddleware4(TestOffsiteMiddleware3): def _get_spiderargs(self): bad_hostname = urlparse("http:////scrapytest.org").hostname - return dict(name="foo", allowed_domains=["scrapytest.org", None, bad_hostname]) + return { + "name": "foo", + "allowed_domains": ["scrapytest.org", None, bad_hostname], + } def test_process_spider_output(self): res = Response("http://scrapytest.org") diff --git a/tests/test_utils_iterators.py b/tests/test_utils_iterators.py index ee22e6675d3..ec377bb19ad 100644 --- a/tests/test_utils_iterators.py +++ b/tests/test_utils_iterators.py @@ -355,7 +355,7 @@ def test_csviter_defaults(self): response = TextResponse(url="http://example.com/", body=body) csv = csviter(response) - result = [row for row in csv] + result = list(csv) self.assertEqual( result, [ @@ -377,7 +377,7 @@ def test_csviter_delimiter(self): csv = csviter(response, delimiter="\t") self.assertEqual( - [row for row in csv], + list(csv), [ {"id": "1", "name": "alpha", "value": "foobar"}, {"id": "2", "name": "unicode", "value": "\xfan\xedc\xf3d\xe9\u203d"}, @@ -394,7 +394,7 @@ def test_csviter_quotechar(self): csv1 = csviter(response1, quotechar="'") self.assertEqual( - [row for row in csv1], + list(csv1), [ {"id": "1", "name": "alpha", "value": "foobar"}, {"id": "2", "name": "unicode", "value": "\xfan\xedc\xf3d\xe9\u203d"}, @@ -407,7 +407,7 @@ def test_csviter_quotechar(self): csv2 = csviter(response2, delimiter="|", quotechar="'") self.assertEqual( - [row for row in csv2], + list(csv2), [ {"id": "1", "name": "alpha", "value": "foobar"}, {"id": "2", "name": "unicode", "value": "\xfan\xedc\xf3d\xe9\u203d"}, @@ -422,7 +422,7 @@ def test_csviter_wrong_quotechar(self): csv = csviter(response) self.assertEqual( - [row for row in csv], + list(csv), [ {"'id'": "1", "'name'": "'alpha'", "'value'": "'foobar'"}, { @@ -441,7 +441,7 @@ def test_csviter_delimiter_binary_response_assume_utf8_encoding(self): csv = csviter(response, delimiter="\t") self.assertEqual( - [row for row in csv], + list(csv), [ {"id": "1", "name": "alpha", "value": "foobar"}, {"id": "2", "name": "unicode", "value": "\xfan\xedc\xf3d\xe9\u203d"}, @@ -458,7 +458,7 @@ def test_csviter_headers(self): csv = csviter(response, headers=[h.decode("utf-8") for h in headers]) self.assertEqual( - [row for row in csv], + list(csv), [ {"id": "1", "name": "alpha", "value": "foobar"}, {"id": "2", "name": "unicode", "value": "\xfan\xedc\xf3d\xe9\u203d"}, @@ -475,7 +475,7 @@ def test_csviter_falserow(self): csv = csviter(response) self.assertEqual( - [row for row in csv], + list(csv), [ {"id": "1", "name": "alpha", "value": "foobar"}, {"id": "2", "name": "unicode", "value": "\xfan\xedc\xf3d\xe9\u203d"}, diff --git a/tests/test_utils_template.py b/tests/test_utils_template.py index cbe80e157d1..fc42c0d2f4d 100644 --- a/tests/test_utils_template.py +++ b/tests/test_utils_template.py @@ -16,7 +16,7 @@ def tearDown(self): rmtree(self.tmp_path) def test_simple_render(self): - context = dict(project_name="proj", name="spi", classname="TheSpider") + context = {"project_name": "proj", "name": "spi", "classname": "TheSpider"} template = "from ${project_name}.spiders.${name} import ${classname}" rendered = "from proj.spiders.spi import TheSpider"